예제 #1
0
import ExcelExporter
import Gerenciador
import os

search = str(input('Search Phrase: '))
gerenciador = Gerenciador.Gerenciador(search, os.getcwd())
lista_autores = gerenciador.loadAutores()
lista_artigos = gerenciador.loadArtigos()

leitor = ExcelExporter.ExcelExporter(search, True, os.getcwd())
print('Done')


예제 #2
0
    def __init__(self, master=None):
        self.gerenciador = Gerenciador("amostra.csv")

        self.fonte = ("Verdana", "12")

        self.container1 = Frame(master)
        self.container1.pack()
        self.container3 = Frame(master)
        self.container3.pack()
        self.container2 = Frame(master)
        self.container2["padx"] = 20
        self.container2["pady"] = 5
        self.container2.pack()
        #self.container5 = Frame(master)
        #self.container5["padx"] = 20
        #self.container5["pady"] = 5
        #self.container5.pack()
        self.container4 = Frame(master)
        self.container4["padx"] = 20
        self.container4["pady"] = 5
        self.container4.pack()

        self.titulo = Text(self.container1, wrap=WORD, width=126)
        self.titulo["font"] = self.fonte
        self.titulo.pack()

        self.atual = Label(self.container3,
                           text="Atual: " + self.getNumAtual() + " |",
                           font=self.fonte)
        self.atual.pack(side=LEFT)
        self.total = Label(self.container3,
                           text="Total: " + self.getTotal(),
                           font=self.fonte)
        self.total.pack(side=LEFT)

        self.imprimePost()

        self.btnF = Button(self.container2,
                           text="Furto",
                           font=self.fonte,
                           width=7)
        self.btnF["command"] = self.setClassF
        self.btnF.pack(side=LEFT)

        self.btnRV = Button(self.container2,
                            text="Roubo",
                            font=self.fonte,
                            width=7)
        self.btnRV["command"] = self.setClassR
        self.btnRV.pack(side=LEFT)

        self.btnTV = Button(self.container2,
                            text="Tentativa",
                            font=self.fonte,
                            width=10)
        self.btnTV["command"] = self.setClassT
        self.btnTV.pack(side=LEFT)

        self.btnO = Button(self.container2,
                           text="Outros",
                           font=self.fonte,
                           width=7)
        self.btnO["command"] = self.setClassO
        self.btnO.pack(side=LEFT)

        #self.btnL = Button(self.container5, text="Com Localização", font=self.fonte, width=14)
        #self.btnL["command"] = self.setClassL
        #self.btnL.pack(side=LEFT)

        #self.btnSL = Button(self.container5, text="Sem Localização", font=self.fonte, width=14)
        #self.btnSL["command"] = self.setClassSL
        #self.btnSL.pack(side=LEFT)

        self.btnP = Button(self.container2,
                           text="Próximo",
                           font=self.fonte,
                           width=7)
        self.btnP["command"] = self.proximoPost
        self.btnP.pack(side=LEFT)

        self.btnE = Button(self.container4,
                           text="Editar",
                           font=self.fonte,
                           width=7)
        self.btnE["command"] = self.editar
        self.btnE.pack(side=LEFT)

        self.btnA = Button(self.container4,
                           text="Apagar",
                           font=self.fonte,
                           width=7)
        self.btnA["command"] = self.excluir
        self.btnA.pack(side=LEFT)

        self.btnS = Button(self.container4,
                           text="Salvar",
                           font=self.fonte,
                           width=7)
        self.btnS["command"] = self.salvar
        self.btnS.pack(side=LEFT)
예제 #3
0
    def start_search(self):
        self.start_time = Timer.timeNow()

        # loads files for the inputted search if they exist, otherwise, the files are created
        self.manager = Gerenciador.Gerenciador(self.input_search)
        self.list_authors = self.manager.loadAutores()
        self.list_articles = self.manager.loadArtigos()

        # booleans for setting the type of search
        normal = False
        lastFiveYears = False
        litReviews = False

        # creates a webdriver instance
        driver = webdriver.Chrome(self.directory_chromedriver,
                                  chrome_options=self.options)

        # runs the following code 3 times, one for each type os search
        for k in range(0, 3):
            # label gui
            self.gui.app.queueFunction(
                self.gui.app.setLabel, 'progress_bar_label',
                'Crawling with ' + str(k + 1) + '/3 parameter...')
            self.gui.app.queueFunction(self.gui.app.setMeter, 'progress_bar',
                                       0)

            # access Semantic Scholar main page
            driver.get('https://www.semanticscholar.org/')

            # waits for the page to load, searching for the Field of Study filter to be enabled
            try:
                waitelement = WebDriverWait(driver, 20). \
                    until(EC.presence_of_element_located((By.XPATH, "//select[@aria-label='Field of study filter']")))
            except TimeoutError:
                print("~~~~ PAGE DID NOT LOAD! ~~~~")

            # dismiss the popup that asks to allow cookies, if it shows up
            try:
                driver.find_element_by_xpath(
                    "//div[@class='copyright-banner__dismiss-btn button button--secondary']"
                ).click()
            except:
                pass

            # input the desired search phrase in the input box and hits ENTER
            driver.find_element_by_name('q').send_keys(self.input_search)
            driver.find_element_by_name('q').send_keys(Keys.ENTER)

            # waits for the page to load
            try:
                waitelement = WebDriverWait(driver, 20). \
                    until(EC.presence_of_element_located(
                    (By.XPATH, "//button[@data-selenium-selector='more-search-filters']")))
            except TimeoutError:
                print("~~~~ PAGE DID NOT LOAD! ~~~~")

            # tests which type of search has been done and sets the correct one
            if normal is False:
                normal = True
            else:
                if lastFiveYears is False:
                    element = driver.find_element_by_xpath(
                        "//button[@data-selenium-selector='last-five-years-filter-button']"
                    )
                    driver.execute_script('arguments[0].click()', element)
                    lastFiveYears = True
                else:
                    if litReviews is False:
                        element = driver.find_element_by_xpath(
                            "//button[@data-selenium-selector='reviews-filter-button']"
                        )
                        driver.execute_script('arguments[0].click()', element)
                        litReviews = True
                    else:
                        pass

            # runs the code for the amount of pages desired
            self.index_progress_bar = 1
            for pag in range(0, self.input_pages):
                # progress bar
                self.gui.app.queueFunction(
                    self.gui.app.setMeter, 'progress_bar',
                    (100 * self.index_progress_bar) / self.input_pages)

                self.index_progress_bar += 1

                # waits for the page to load
                while True:
                    try:
                        element = driver.find_element_by_xpath(
                            "//div[@class='result-page is-filtering']")
                    except:
                        break

                # searches for the articles in the page and saves them in a list
                list_articles_in_page = driver.find_elements_by_xpath(
                    "//article[@class='search-result']")

                # iterates over each article in the articles list
                for item in list_articles_in_page:
                    # saves the article title as a string
                    title = item.find_element_by_xpath(
                        ".//a[@data-selenium-selector='title-link']").text

                    # saves all authors with a html link to their pages in a list
                    list_authors_html_link = item.find_elements_by_xpath(
                        ".//a[@class='author-list__link author-list__author-name']"
                    )

                    # saves all authors without a html link to their pages in a list
                    list_authors_without_html_link = None
                    try:
                        list_authors_without_html_link = item.find_elements_by_xpath(
                            ".//span[@class='author-list__author-name']")
                    except:
                        pass

                    # creates a list of the authors for the article
                    list_authors_in_article = []

                    # iterates over each author in the list with html link
                    for temp in list_authors_html_link:
                        # saves the author name as a string
                        author = temp.text
                        # saves the author page html link as a string
                        link = temp.get_attribute('href')
                        # checks it the author already exists and if not, creates it and adds it to the authors list
                        if len(self.list_authors) == 0:
                            temp = Autor.Autor(author, link)
                            list_authors_in_article.append(temp)
                            self.list_authors.append(temp)
                        else:
                            created = False
                            for i in self.list_authors:
                                if author == i.nome:
                                    list_authors_in_article.append(i)
                                    list_authors_in_article.sort()
                                    created = True
                                    break
                                if author[0] < i.nome[0]:
                                    temp = Autor.Autor(author, link)
                                    list_authors_in_article.append(temp)
                                    list_authors_in_article.sort()
                                    self.list_authors.append(temp)
                                    self.list_authors.sort()
                                    created = True
                                    break
                            if created is False:
                                temp = Autor.Autor(author, link)
                                list_authors_in_article.append(temp)
                                list_authors_in_article.sort()
                                self.list_authors.append(temp)
                                self.list_authors.sort()

                    # iterates over each author in the list without html link, if the list is not empty
                    if list_authors_without_html_link is not None:
                        for temp in list_authors_without_html_link:
                            # saves the author name as a string
                            author = temp.text
                            # checks it the author already exists and if not, creates it and adds it to the authors list
                            if len(self.list_authors) == 0:
                                temp = Autor.Autor(author, None)
                                list_authors_in_article.append(temp)
                                self.list_authors.append(temp)
                            else:
                                created = False
                                for i in self.list_authors:
                                    if author == i.nome:
                                        list_authors_in_article.append(i)
                                        list_authors_in_article.sort()
                                        created = True
                                        break
                                    if author[0] < i.nome[0]:
                                        temp = Autor.Autor(author, None)
                                        list_authors_in_article.append(temp)
                                        list_authors_in_article.sort()
                                        self.list_authors.append(temp)
                                        self.list_authors.sort()
                                        created = True
                                        break
                                if created is False:
                                    temp = Autor.Autor(author, None)
                                    list_authors_in_article.append(temp)
                                    list_authors_in_article.sort()
                                    self.list_authors.append(temp)
                                    self.list_authors.sort()

                    # saves the article origin as a string
                    origin = '-'
                    try:
                        origin = item.find_element_by_xpath(
                            ".//li[@data-selenium-selector='venue-metadata']"
                        ).text
                    except:
                        pass

                    # saves the article date as a string
                    date = '0'
                    try:
                        date = item.find_element_by_xpath(
                            ".//li[@data-selenium-selector='paper-year']").text
                    except:
                        pass

                    # saves the article influence factor as a string
                    influence = '0'
                    try:
                        influence = item.find_element_by_xpath(
                            ".//li[@data-selenium-selector='search-result-influential-citations']"
                        ).text
                        influence = influence.replace(',', '')
                        influence = influence.replace('.', '')
                    except:
                        pass

                    # saves the article citation velocity as a string
                    velocity = '0'
                    try:
                        velocity = item.find_element_by_xpath(
                            ".//li[@data-selenium-selector='search-result-citation-velocity']"
                        ).text
                        velocity = velocity.replace(',', '')
                        velocity = velocity.replace('.', '')
                    except:
                        pass

                    # saves the article html link as a string
                    link = '-'
                    try:
                        link = item.find_element_by_xpath(
                            ".//a[@data-selenium-selector='paper-link']"
                        ).get_attribute('href')
                    except:
                        pass

                    # saves the article type as a string
                    cite = '-'
                    bibtex = '-'
                    try:
                        item.find_element_by_xpath(
                            ".//button[@data-selenium-selector='cite-link']"
                        ).click()
                        try:
                            waitelement = WebDriverWait(driver, 20). \
                                until(EC.presence_of_element_located(
                                (By.XPATH, "//cite[@class='formatted-citation formatted-citation--style-bibtex']")))
                        except TimeoutError:
                            print("~~~~ PAGE DID NOT LOAD! ~~~~")

                        cite = driver.find_element_by_xpath(
                            "//cite[@class='formatted-citation formatted-citation--style-bibtex']"
                        ).text
                        driver.find_element_by_xpath(
                            "//cite[@class='formatted-citation formatted-citation--style-bibtex']"
                        ).send_keys(Keys.ESCAPE)
                        bibtex = cite
                        cite = self.return_type_cite(cite)
                    except:
                        pass

                    # creates a new instance of a Article object
                    new_article = Artigo.Artigo(title, list_authors_in_article,
                                                origin, date, influence,
                                                velocity, link, cite, bibtex)

                    # checks if the article already exists, and if not, adds it to the articles list
                    repeated_article = False
                    if len(self.list_articles) == 0:
                        self.list_articles.append(new_article)
                    else:
                        created = False
                        for i in self.list_articles:
                            if new_article.link == i.link and new_article.titulo == i.titulo:
                                repeated_article = True
                                break
                            if new_article.titulo[0] < i.titulo[0]:
                                self.list_articles.append(new_article)
                                self.list_articles.sort()
                                created = True
                                break
                        if created is False and repeated_article is False:
                            self.list_articles.append(new_article)
                            self.list_articles.sort()

                    if repeated_article is False:
                        for autorTemp in list_authors_in_article:
                            autorTemp.addArtigo(new_article)

                    # feedback to user
                    # print('Article ' + title + " obtained with success.")

                # print('~~~~ PAGE ' + str(pag+1) + ' COMPLETED SUCCESSFULLY ~~~~')

                # tries to go to the next page, if exists
                try:
                    element = driver.find_element_by_xpath(
                        "//a[@data-selenium-selector='next-page']")
                    driver.execute_script('arguments[0].click()', element)
                except:
                    print("SUBJECT HAS NO MORE SEARCH PAGES!")
                    break

        self.end_time = Timer.timeNow()

        # closes the Google Chrome
        driver.quit()

        # saves the list of articles and authors as .pkl files
        self.manager.saveArtigos(self.list_articles)
        self.manager.saveAutores(self.list_authors)

        self.gui.show_search_done_alert(
            Timer.totalTime(self.start_time, self.end_time),
            str(len(self.list_articles)))
예제 #4
0
    def __init__(self, pesquisa):
        diretorio_original = os.getcwd()

        os.chdir(diretorio_original + '/Files/' + pesquisa + '/')
        diretorio_excel = diretorio_original + '/Files/' + pesquisa + '/'

        workbook = xlsxwriter.Workbook(diretorio_excel + pesquisa + '.xlsx')

        os.chdir(diretorio_original)

        worksheet_artigos = workbook.add_worksheet('ARTIGOS')
        worksheet_autores = workbook.add_worksheet('AUTORES')

        titulo = 0
        autores = 1
        publicado = 2
        data = 3
        influencia = 4
        velocidade = 5
        link = 6
        linha = 0

        primeiraLinha_format = workbook.add_format({
            'bold': True,
            'font_size': '16',
            'align': 'center',
            'bg_color': '#757A79',
            'font_color': 'white',
            'border': 1
        })

        one_line_format = workbook.add_format({
            'bg_color': "#B5E9FF",
            'align': 'center',
            'border': 1
        })

        autor_format = workbook.add_format({
            'bg_color': "#B5E9FF",
            'align': 'center',
            'border': 1,
            'underline': True,
            'font_color': 'blue'
        })

        merge_format = workbook.add_format({
            'align': 'center',
            'valign': 'vcenter',
            'bg_color': "#B5E9FF",
            'border': 1
        })

        worksheet_artigos.write(linha, titulo, 'Título do Artigo',
                                primeiraLinha_format)
        worksheet_artigos.write(linha, autores, 'Autores',
                                primeiraLinha_format)
        worksheet_artigos.write(linha, publicado, 'Origem da Publicação',
                                primeiraLinha_format)
        worksheet_artigos.write(linha, data, 'Ano de Publicação',
                                primeiraLinha_format)
        worksheet_artigos.write(linha, influencia, 'Fator de Influência',
                                primeiraLinha_format)
        worksheet_artigos.write(linha, velocidade, 'Velocidade de Citação',
                                primeiraLinha_format)
        worksheet_artigos.write(linha, link, 'Link do artigo',
                                primeiraLinha_format)
        linha += 1

        gerenciador = Gerenciador.Gerenciador(pesquisa)
        listaDeArtigos = gerenciador.loadArtigos()
        listaDeAutores = gerenciador.loadAutores()

        for artigo in listaDeArtigos:
            primeiraLinha = linha
            worksheet_artigos.write(linha, titulo, artigo.titulo,
                                    one_line_format)
            worksheet_artigos.write(linha, publicado, artigo.publicado_em,
                                    one_line_format)
            worksheet_artigos.write(linha, data, artigo.data, one_line_format)
            worksheet_artigos.write(linha, influencia, artigo.influencia,
                                    one_line_format)
            worksheet_artigos.write(linha, velocidade, artigo.velocidade,
                                    one_line_format)
            worksheet_artigos.write(linha, link, artigo.link, one_line_format)
            for autor in artigo.autores:
                worksheet_artigos.write_url(linha,
                                            autores,
                                            autor.link,
                                            autor_format,
                                            string=autor.nome)
                linha += 1
            if primeiraLinha != linha - 1:
                worksheet_artigos.merge_range(primeiraLinha, titulo, linha - 1,
                                              titulo, artigo.titulo,
                                              merge_format)
                worksheet_artigos.merge_range(primeiraLinha, publicado,
                                              linha - 1, publicado,
                                              artigo.publicado_em,
                                              merge_format)
                worksheet_artigos.merge_range(primeiraLinha, data, linha - 1,
                                              data, artigo.data, merge_format)
                worksheet_artigos.merge_range(primeiraLinha, influencia,
                                              linha - 1, influencia,
                                              artigo.influencia, merge_format)
                worksheet_artigos.merge_range(primeiraLinha, velocidade,
                                              linha - 1, velocidade,
                                              artigo.velocidade, merge_format)
                worksheet_artigos.merge_range(primeiraLinha, link, linha - 1,
                                              link, artigo.link, merge_format)

        nome_autor = 0
        link_autor = 1
        artigos_autor = 2
        linha = 0

        worksheet_autores.write(linha, nome_autor, 'Nome do Autor',
                                primeiraLinha_format)
        worksheet_autores.write(linha, link_autor, 'Pagina do Autorr',
                                primeiraLinha_format)
        worksheet_autores.write(linha, artigos_autor,
                                'Artigos publicados relacionados',
                                primeiraLinha_format)
        linha += 1

        for autor in listaDeAutores:
            primeiraLinha = linha
            worksheet_autores.write(linha, nome_autor, autor.nome,
                                    one_line_format)
            worksheet_autores.write(linha, link_autor, autor.link,
                                    one_line_format)
            for artigos in autor.artigos:
                try:
                    worksheet_autores.write_url(linha,
                                                artigos_autor,
                                                artigos.link,
                                                autor_format,
                                                string=artigos.titulo)
                except:
                    worksheet_autores.write(linha, artigos_autor,
                                            artigos.titulo, one_line_format)
                finally:
                    linha += 1
            if primeiraLinha != linha - 1:
                worksheet_autores.merge_range(primeiraLinha, nome_autor,
                                              linha - 1, nome_autor,
                                              autor.nome, merge_format)
                worksheet_autores.merge_range(primeiraLinha, link_autor,
                                              linha - 1, link_autor,
                                              autor.link, merge_format)

        workbook.close()
예제 #5
0
import ExcelExporter
import Gerenciador

gerenciador = Gerenciador.Gerenciador('Texture Analysis Methods')
lista_autores = gerenciador.loadAutores()
lista_artigos = gerenciador.loadArtigos()

# leitor = ExcelExporter.ExcelExporter('CNN Object Detection')
print('Pronto')


예제 #6
0
    pass
else:
    os.mkdir('PDFs')

options.add_experimental_option("prefs", {
  "download.default_directory": diretorio_pdf,
  "download.prompt_for_download": False,
  "download.directory_upgrade": True,
  "plugins.always_open_pdf_externally": True,
  "safebrowsing.enabled": True
})

pesquisa = str(input("Entre com sua pesquisa:\n"))
paginas = int(input("Quantas paginas gostaria de pesquisar?\n"))

gerenciador = Gerenciador.Gerenciador(pesquisa)
lista_autores = gerenciador.loadAutores()
lista_artigos = gerenciador.loadArtigos()
normal = False
lastFiveYears = False
litReviews = False

driver = webdriver.Chrome(diretorio_chromedriver, chrome_options=options)
for k in range(0, 3):
    driver.set_page_load_timeout('10')
    driver.get('https://www.semanticscholar.org/')
    driver.find_element_by_name('q').send_keys(pesquisa)
    driver.find_element_by_name('q').send_keys(Keys.ENTER)
    delay(3)

    if normal is False:
    def single_creator(self, search_type):
        diretorio_original = os.getcwd()

        os.chdir(diretorio_original + '/Results/' + self.search_parameter +
                 '/')
        diretorio_excel = diretorio_original + '/Results/' + self.search_parameter + '/'

        workbook = xlsxwriter.Workbook(diretorio_excel +
                                       self.search_parameter + '.xlsx')

        os.chdir(diretorio_original)

        worksheet_artigos = workbook.add_worksheet('ARTICLES')
        worksheet_autores = workbook.add_worksheet('AUTHORS')

        indice = 0
        type = 1
        titulo = 2
        autores = 3
        publicado = 4
        data = 5
        influencia = 6
        velocidade = 7
        optimized = 8
        link = 9
        bibtex = 10
        linha = 0

        label_comment = 'Label NUMBER: 1 -> article\n' \
                        'Label NUMBER: 2 -> conference, inproceedings, proceedings or phdthesis\n' \
                        'Label NUMBER: 3 -> mastersthesis, book, inbook, Incollection or techreport\n' \
                        'Label NUMBER: 4 -> manual, misc or unpublished'

        primeiraLinha_format = workbook.add_format({
            'bold': True,
            'font_size': '16',
            'align': 'center',
            'bg_color': '#757A79',
            'font_color': 'white',
            'border': 1
        })

        one_line_format = workbook.add_format({
            'bg_color': "#B5E9FF",
            'align': 'center',
            'border': 1
        })

        autor_format = workbook.add_format({
            'bg_color': "#B5E9FF",
            'align': 'center',
            'border': 1,
            'underline': True,
            'font_color': 'blue'
        })

        merge_format = workbook.add_format({
            'align': 'center',
            'valign': 'vcenter',
            'bg_color': "#B5E9FF",
            'border': 1
        })

        worksheet_artigos.write(linha, indice, 'Index', primeiraLinha_format)
        worksheet_artigos.write(linha, type, 'Article Type',
                                primeiraLinha_format)
        worksheet_artigos.write_comment(linha, type, label_comment)
        worksheet_artigos.write(linha, titulo, 'Title', primeiraLinha_format)
        worksheet_artigos.write(linha, autores, 'Authors',
                                primeiraLinha_format)
        worksheet_artigos.write(linha, publicado, 'Publication Source',
                                primeiraLinha_format)
        worksheet_artigos.write(linha, data, 'Publication Year',
                                primeiraLinha_format)
        worksheet_artigos.write(linha, influencia, 'Influence Factor',
                                primeiraLinha_format)
        worksheet_artigos.write(linha, velocidade, 'Citation Velocity',
                                primeiraLinha_format)
        worksheet_artigos.write(linha, link, 'Article Link',
                                primeiraLinha_format)
        worksheet_artigos.write(linha, bibtex, 'BibTex', primeiraLinha_format)
        worksheet_artigos.write(linha, optimized, 'Optimized Factor',
                                primeiraLinha_format)
        linha += 1

        gerenciador = Gerenciador.Gerenciador(self.search_parameter)
        listaDeArtigos = gerenciador.loadArtigos()
        listaDeAutores = gerenciador.loadAutores()

        if search_type == 1:
            self.define_alphas()
            self.order_optimized(listaDeArtigos)
            listaDeArtigos = self.ordered_optimized_list
        elif search_type == 2:
            self.order_articles(listaDeArtigos, 1)
            listaDeArtigos = self.ordered_influence_articles_list
        elif search_type == 3:
            self.order_articles(listaDeArtigos, 2)
            listaDeArtigos = self.ordered_velocity_articles_list
        elif search_type == 4:
            self.order_articles(listaDeArtigos, 3)
            listaDeArtigos = self.ordered_date_articles_list
        elif search_type == 5:
            pass

        numeroDoArtigo = 1

        for artigo in listaDeArtigos:
            primeiraLinha = linha
            worksheet_artigos.write(linha, indice, str(numeroDoArtigo),
                                    one_line_format)

            article_label = ''
            if artigo.cite == 'article':
                article_label = '1'
            elif artigo.cite == 'conference' or artigo.cite == 'inproceedings' or artigo.cite == 'proceedings' or \
                    artigo.cite == 'phdthesis':
                article_label = '2'
            elif artigo.cite == 'mastersthesis' or artigo.cite == 'book' or artigo.cite == 'inbook' or \
                    artigo.cite == 'Incollection' or artigo.cite == 'techreport':
                article_label = '3'
            else:
                article_label = '4'

            worksheet_artigos.write(linha, type, article_label,
                                    one_line_format)
            worksheet_artigos.write(linha, titulo, artigo.titulo,
                                    one_line_format)
            worksheet_artigos.write(linha, publicado, artigo.publicado_em,
                                    one_line_format)
            worksheet_artigos.write(linha, data, artigo.data, one_line_format)
            worksheet_artigos.write(linha, influencia, artigo.influencia,
                                    one_line_format)
            worksheet_artigos.write(linha, velocidade, artigo.velocidade,
                                    one_line_format)
            worksheet_artigos.write(linha, optimized, artigo.total_factor,
                                    one_line_format)
            worksheet_artigos.write(linha, link, artigo.link, one_line_format)
            worksheet_artigos.write(linha, bibtex, artigo.bibtex,
                                    one_line_format)
            authors = ''
            for autor in artigo.autores:
                authors += autor.nome + ', '
            authors = authors[:-2]
            worksheet_artigos.write(linha, autores, authors, one_line_format)

            numeroDoArtigo += 1
            linha += 1

        nome_autor = 0
        link_autor = 1
        artigos_autor = 2
        linha = 0

        worksheet_autores.write(linha, nome_autor, 'Author Name',
                                primeiraLinha_format)
        worksheet_autores.write(linha, link_autor, 'Author Page',
                                primeiraLinha_format)
        worksheet_autores.write(linha, artigos_autor,
                                'Related Published Articles',
                                primeiraLinha_format)
        linha += 1

        for autor in listaDeAutores:
            primeiraLinha = linha
            worksheet_autores.write(linha, nome_autor, autor.nome,
                                    one_line_format)
            worksheet_autores.write(linha, link_autor, autor.link,
                                    one_line_format)
            for artigos in autor.artigos:
                try:
                    worksheet_autores.write_url(linha,
                                                artigos_autor,
                                                artigos.link,
                                                autor_format,
                                                string=artigos.titulo)
                except:
                    worksheet_autores.write(linha, artigos_autor,
                                            artigos.titulo, one_line_format)
                finally:
                    linha += 1
            if primeiraLinha != linha - 1:
                worksheet_autores.merge_range(primeiraLinha, nome_autor,
                                              linha - 1, nome_autor,
                                              autor.nome, merge_format)
                worksheet_autores.merge_range(primeiraLinha, link_autor,
                                              linha - 1, link_autor,
                                              autor.link, merge_format)

        workbook.close()

        self.gui.show_saved_alert(diretorio_excel)
예제 #8
0
    def single_creator(self, search_type):
        os.chdir(
            os.path.join(self.root_directory, 'Results',
                         self.search_parameter))
        diretorio_excel = os.path.join(self.root_directory, 'Results',
                                       self.search_parameter)

        workbook = xlsxwriter.Workbook(
            os.path.join(diretorio_excel, self.search_parameter + '.xlsx'))

        os.chdir(self.root_directory)

        worksheet_artigos = workbook.add_worksheet('ARTICLES')
        worksheet_autores = workbook.add_worksheet('AUTHORS')

        indice = 0
        type = 1
        titulo = 2
        autores = 3
        publicado = 4
        data = 5
        citacoes = 6
        optimized = 7
        impact = 8
        link = 9
        bibtex = 10
        synopsis = 11
        linha = 0

        label_comment = 'Label NUMBER: 1 -> article\n' \
                        'Label NUMBER: 2 -> conference, inproceedings, proceedings or phdthesis\n' \
                        'Label NUMBER: 3 -> mastersthesis, book, inbook, Incollection or techreport\n' \
                        'Label NUMBER: 4 -> manual, misc or unpublished'

        primeiraLinha_format = workbook.add_format({
            'bold': True,
            'font_size': '16',
            'align': 'center',
            'bg_color': '#757A79',
            'font_color': 'white',
            'border': 1
        })

        one_line_format = workbook.add_format({
            'bg_color': "#B5E9FF",
            'align': 'center',
            'border': 1
        })

        autor_format = workbook.add_format({
            'bg_color': "#B5E9FF",
            'align': 'center',
            'border': 1,
            'underline': True,
            'font_color': 'blue'
        })

        merge_format = workbook.add_format({
            'align': 'center',
            'valign': 'vcenter',
            'bg_color': "#B5E9FF",
            'border': 1
        })

        worksheet_artigos.write(linha, indice, 'Index', primeiraLinha_format)
        worksheet_artigos.write(linha, type, 'Article Type',
                                primeiraLinha_format)
        worksheet_artigos.write_comment(linha, type, label_comment)
        worksheet_artigos.write(linha, titulo, 'Title', primeiraLinha_format)
        worksheet_artigos.write(linha, autores, 'Authors',
                                primeiraLinha_format)
        worksheet_artigos.write(linha, publicado, 'Publication Source',
                                primeiraLinha_format)
        worksheet_artigos.write(linha, data, 'Publication Year',
                                primeiraLinha_format)
        worksheet_artigos.write(linha, citacoes, 'Citations',
                                primeiraLinha_format)
        worksheet_artigos.write(linha, link, 'Article Link',
                                primeiraLinha_format)
        worksheet_artigos.write(linha, bibtex, 'BibTex', primeiraLinha_format)
        worksheet_artigos.write(linha, optimized, 'Importance Rate',
                                primeiraLinha_format)
        worksheet_artigos.write(linha, impact, 'Impact Factor',
                                primeiraLinha_format)
        worksheet_artigos.write(linha, synopsis, 'Synopsis',
                                primeiraLinha_format)
        linha += 1

        gerenciador = Gerenciador.Gerenciador(self.search_parameter,
                                              self.root_directory)
        listaDeArtigos = gerenciador.loadArtigos()
        listaDeAutores = gerenciador.loadAutores()

        if search_type == 1:
            self.order_optimized(listaDeArtigos)
            listaDeArtigos = self.ordered_optimized_list
        elif search_type == 2:
            self.order_articles(listaDeArtigos, 1)
            listaDeArtigos = self.ordered_citations_articles_list
        elif search_type == 3:
            self.order_articles(listaDeArtigos, 2)
            listaDeArtigos = self.ordered_date_articles_list
        elif search_type == 4:
            pass

        numeroDoArtigo = 1

        for artigo in listaDeArtigos:
            primeiraLinha = linha
            worksheet_artigos.write(linha, indice, str(numeroDoArtigo),
                                    one_line_format)

            articleLabel = self.article_label(artigo)

            worksheet_artigos.write(linha, type, articleLabel, one_line_format)
            worksheet_artigos.write(linha, titulo, artigo.titulo,
                                    one_line_format)
            worksheet_artigos.write(linha, publicado, artigo.publicado_em,
                                    one_line_format)
            worksheet_artigos.write(linha, data, artigo.data, one_line_format)
            worksheet_artigos.write(linha, citacoes, artigo.citacoes,
                                    one_line_format)
            worksheet_artigos.write(linha, optimized, artigo.total_factor,
                                    one_line_format)
            worksheet_artigos.write(linha, impact, artigo.impact_factor,
                                    one_line_format)
            worksheet_artigos.write(linha, link, artigo.link, one_line_format)
            worksheet_artigos.write(linha, bibtex, artigo.bibtex,
                                    one_line_format)
            worksheet_artigos.write(linha, synopsis, artigo.synopsis,
                                    one_line_format)
            authors = ''

            for autor in artigo.autores:
                authors += autor.nome + ', '
            authors = authors[:-2]
            worksheet_artigos.write(linha, autores, authors, one_line_format)

            numeroDoArtigo += 1
            linha += 1

        nome_autor = 0
        link_autor = 1
        artigos_autor = 2
        linha = 0

        worksheet_autores.write(linha, nome_autor, 'Author Name',
                                primeiraLinha_format)
        worksheet_autores.write(linha, link_autor, 'Author Page',
                                primeiraLinha_format)
        worksheet_autores.write(linha, artigos_autor,
                                'Related Published Articles',
                                primeiraLinha_format)
        linha += 1

        for autor in listaDeAutores:
            if len(autor.artigos) > 0:
                primeiraLinha = linha
                worksheet_autores.write(linha, nome_autor, autor.nome,
                                        one_line_format)
                worksheet_autores.write(linha, link_autor, autor.link,
                                        one_line_format)
                for artigos in autor.artigos:
                    try:
                        worksheet_autores.write_url(linha,
                                                    artigos_autor,
                                                    artigos.link,
                                                    autor_format,
                                                    string=artigos.titulo)
                    except:
                        worksheet_autores.write(linha, artigos_autor,
                                                artigos.titulo,
                                                one_line_format)
                    finally:
                        linha += 1
                if primeiraLinha != linha - 1:
                    worksheet_autores.merge_range(primeiraLinha, nome_autor,
                                                  linha - 1, nome_autor,
                                                  autor.nome, merge_format)
                    worksheet_autores.merge_range(primeiraLinha, link_autor,
                                                  linha - 1, link_autor,
                                                  autor.link, merge_format)

        workbook.close()

        self.gui.show_saved_alert(diretorio_excel)
예제 #9
0
import ExcelExporter
import Gerenciador

search = str(input('Search Phrase:'))
gerenciador = Gerenciador.Gerenciador(search)
lista_autores = gerenciador.loadAutores()
lista_artigos = gerenciador.loadArtigos()

leitor = ExcelExporter.ExcelExporter(search)
print('Done')


예제 #10
0
    def start_search(self):
        self.start_time = Timer.timeNow()

        # loads files for the inputted search if they exist, otherwise, the files are created
        self.manager = Gerenciador.Gerenciador(self.input_search,
                                               self.root_directory)
        self.list_authors = self.manager.loadAutores()
        self.list_articles = self.manager.loadArtigos()

        # creates a webdriver instance
        driver = webdriver.Chrome(self.directory_chromedriver,
                                  chrome_options=self.options)

        # runs the following code 3 times, one for each type os search
        for k in range(0, 3):
            # label gui
            self.gui.app.queueFunction(
                self.gui.app.setLabel, 'progress_bar_label',
                'Crawling with ' + str(k + 1) + '/3 parameter...')
            self.gui.app.queueFunction(self.gui.app.setMeter, 'progress_bar',
                                       0)

            # access Semantic Scholar main page
            driver.get('https://www.semanticscholar.org/')

            # waits for the page to load, searching for the Field of Study filter to be enabled
            try:
                waitelement = WebDriverWait(driver, 20). \
                    until(EC.presence_of_element_located((By.XPATH, "//input[@aria-label='Search text']")))
            except TimeoutError:
                print("~~~~ PAGE DID NOT LOAD! ~~~~")

            # dismiss the popup that asks to allow cookies, if it shows up
            try:
                driver.find_element_by_xpath(
                    "//div[@class='copyright-banner__dismiss-btn button button--secondary']"
                ).click()
            except:
                pass

            # input the desired search phrase in the input box and hits ENTER
            driver.find_element_by_name('q').send_keys(self.input_search)
            driver.find_element_by_name('q').send_keys(Keys.ENTER)

            # waits for the page to load. It happens when the number of results is shown
            try:
                waitelement = WebDriverWait(driver, 20). \
                    until(EC.presence_of_element_located(
                    (By.XPATH, "//div[@class='dropdown-filters__result-count']")))
            except TimeoutError:
                print("~~~~ PAGE DID NOT LOAD! ~~~~")

            # tests which type of search has been done and sets the correct one
            if k == 1:  # results from the last five years
                driver.find_element_by_xpath(
                    "//button[@class='cl-button cl-button--no-arrow-divider cl-button--not-icon-only cl-button--no-icon cl-button--has-label cl-button--icon-pos-left cl-button--shape-rectangle cl-button--size-default cl-button--type-default cl-dropdown-button cl-dropdown dropdown-filters__dates']"
                ).click()
                element = driver.find_element_by_xpath(
                    "//button[@data-selenium-selector='last-five-years-filter-button']"
                )
                driver.execute_script('arguments[0].click()', element)
                driver.find_element_by_xpath(
                    "//div[@class='flex-container flex-row-vcenter dropdown-filters__flex-container']"
                ).click()
            elif k == 2:  # results with Reviews marked
                driver.find_element_by_xpath(
                    "//button[@class='cl-button cl-button--no-arrow-divider cl-button--not-icon-only cl-button--no-icon cl-button--has-label cl-button--icon-pos-left cl-button--shape-rectangle cl-button--size-default cl-button--type-default cl-dropdown-button cl-dropdown dropdown-filters__pub_type']"
                ).click()
                driver.find_element_by_xpath(
                    "//section[@data-selenium-selector='publicationType']"
                ).click()
                driver.find_element_by_xpath(
                    "//*[contains(text(), 'Review (')]").click()
                driver.find_element_by_xpath(
                    "//div[@class='flex-container flex-row-vcenter dropdown-filters__flex-container']"
                ).click()
            else:
                pass

            # runs the code for the amount of pages desired
            self.index_progress_bar = 1
            self.list_articles = set(self.list_articles)
            for pag in range(0, self.input_pages):
                # progress bar
                self.gui.app.queueFunction(
                    self.gui.app.setMeter, 'progress_bar',
                    (100 * self.index_progress_bar) / self.input_pages)

                self.index_progress_bar += 1

                # waits for the page to load
                while True:
                    try:
                        element = driver.find_element_by_xpath(
                            "//div[@class='result-page is-filtering']")
                    except:
                        break

                # searches for the articles in the page and saves them in a list
                list_articles_in_page = driver.find_elements_by_xpath(
                    "//article[@class='search-result']")

                # iterates over each article in the articles list
                for item in list_articles_in_page:
                    # saves the article title as a string
                    title = item.find_element_by_xpath(
                        ".//a[@data-selenium-selector='title-link']").text

                    # saves all authors with a html link to their pages in a list
                    list_authors_html_link = item.find_elements_by_xpath(
                        ".//a[@class='author-list__link author-list__author-name']"
                    )

                    # saves all authors without a html link to their pages in a list
                    list_authors_without_html_link = None
                    try:
                        list_authors_without_html_link = item.find_elements_by_xpath(
                            ".//span[@class='author-list__author-name']")
                    except:
                        pass

                    # creates a set list of the authors for the article
                    list_authors_in_article = set()

                    self.list_authors = set(self.list_authors)

                    # iterates over each author in the list with html link
                    for temp in list_authors_html_link:
                        # saves the author name as a string
                        author = temp.text

                        # saves the author page html link as a string
                        link = temp.get_attribute('href')

                        # creates temporary author
                        temp = Autor.Autor(author, link)

                        # adds new authors to the set lists
                        self.list_authors.add(temp)
                        list_authors_in_article.add(temp)

                    # iterates over each author in the list without html link, if the list is not empty
                    if list_authors_without_html_link is not None:
                        for temp in list_authors_without_html_link:
                            # saves the author name as a string
                            author = temp.text

                            # creates temporary author
                            temp = Autor.Autor(author, None)

                            # adds new authors to the set lists
                            self.list_authors.add(temp)
                            list_authors_in_article.add(temp)

                    self.list_authors = list(self.list_authors)
                    self.list_authors.sort()
                    list_authors_in_article = list(list_authors_in_article)
                    list_authors_in_article.sort()

                    # saves the article origin as a string
                    origin = '-'
                    try:
                        origin = item.find_element_by_xpath(
                            ".//span[@data-selenium-selector='venue-metadata']"
                        ).text
                    except:
                        pass

                    # saves the article date as a string
                    date = '0'
                    try:
                        date = item.find_element_by_xpath(
                            ".//span[@data-heap-id='paper-year']").text
                    except:
                        pass

                    # saves the article total citations as a string
                    citations = '0'
                    try:
                        citations = item.find_element_by_xpath(
                            ".//li[@data-selenium-selector='search-result-total-citations']"
                        ).text
                        citations = citations.replace(',', '')
                        citations = citations.replace('.', '')
                    except:
                        pass

                    # saves the article html link as a string
                    link = '-'
                    try:
                        link = item.find_element_by_xpath(
                            ".//a[@data-selenium-selector='paper-link']"
                        ).get_attribute('href')
                    except:
                        pass

                    # saves the article type as a string
                    cite = '-'
                    bibtex = '-'
                    try:
                        item.find_element_by_xpath(
                            ".//button[@data-selenium-selector='cite-link']"
                        ).click()
                        try:
                            waitelement = WebDriverWait(driver, 20). \
                                until(EC.presence_of_element_located(
                                (By.XPATH, "//cite[@class='formatted-citation formatted-citation--style-bibtex']")))
                        except TimeoutError:
                            print("~~~~ PAGE DID NOT LOAD! ~~~~")

                        cite = driver.find_element_by_xpath(
                            "//cite[@class='formatted-citation formatted-citation--style-bibtex']"
                        ).get_attribute('textContent')
                        driver.find_element_by_xpath(
                            "//cite[@class='formatted-citation formatted-citation--style-bibtex']"
                        ).send_keys(Keys.ESCAPE)
                        bibtex = cite
                        cite = self.return_type_cite(cite)
                    except:
                        pass

                    # saves the article synopsis as a string
                    synopsis = 'No synopsis'
                    try:
                        item.find_element_by_xpath(
                            ".//span[@class='more mod-clickable']").click()
                        element = item.find_element_by_xpath(
                            ".//span[@class='abstract full-abstract']")
                        synopsis = element.text.replace(" Collapse", "")
                    except:
                        pass

                    # creates a new instance of a Article object
                    new_article = Artigo.Artigo(title, list_authors_in_article,
                                                origin, date, citations, link,
                                                cite, bibtex, synopsis)

                    # adds new article to set list (set list does not allow duplicates)
                    before = len(self.list_articles)
                    self.list_articles.add(new_article)
                    after = len(self.list_articles)

                    # add article to the author's article list if the article is not repeated
                    if before is not after:
                        for autorTemp in list_authors_in_article:
                            autorTemp.addArtigo(new_article)

                # tries to go to the next page, if exists
                try:
                    element = driver.find_element_by_xpath(
                        "//a[@data-selenium-selector='next-page']")
                    driver.execute_script('arguments[0].click()', element)
                except:
                    print("SUBJECT HAS NO MORE SEARCH PAGES!")
                    break

        self.end_time = Timer.timeNow()

        # converts set to list, to be able to sort it after
        self.list_articles = list(self.list_articles)

        # closes the Google Chrome
        driver.quit()

        # saves the list of articles and authors as .pkl files
        self.manager.saveArtigos(self.list_articles)
        self.manager.saveAutores(self.list_authors)

        self.gui.show_search_done_alert(
            Timer.totalTime(self.start_time, self.end_time),
            str(len(self.list_articles)))