Example #1
0
import os
import requests
from bs4 import BeautifulSoup
import re
import zipfile
import glob
import re
import matplotlib.pyplot as plt
import matplotlib.style as style
# style.available
style.use('fivethirtyeight')

# ------------------------------------------------------------
# lendo a lista dos IDs e nome dos pesquisadores

df_idlist = readIdList()

# ------------------------------------------------------------
# roda as funcoes para pegar dados de cada pesquisador

for nid in range(len(df_idlist)):
    zipfilename = str(df_idlist.iloc[nid, 0]) + '.zip'
    getprojpesqext(zipfilename)
    getprodtec(zipfilename)
    getorient(zipfilename)
    getperiod(zipfilename)
    getlivro(zipfilename)
    getcapit(zipfilename)
    getnomecompleto(zipfilename)
    getdiscip(zipfilename)
Example #2
0
def getgrapho():
    # lendo a lista dos IDs e nome dos pesquisadores
    df_idlist = readIdList()
    # df_idlist['ID_LATTES'] = df_idlist['ID_LATTES'].apply(ss)
    config_file = open('./config.txt', 'r')
    yyi = config_file.readlines()[5].split(':')[1]
    yyi = yyi.rstrip('\n')
    yyi = yyi.strip(' ')
    yyi = float(yyi)
    config_file.close()
    config_file = open('./config.txt', 'r')
    yyf = config_file.readlines()[6].split(':')[1]
    yyf = yyf.rstrip('\n')
    yyf = yyf.strip(' ')
    yyf = float(yyf)
    config_file.close()
    # ------------------------------------------------------------
    # importadando os data frames gerados pelo gettidy
    # ------------------------------------------------------------
    dfppe_uniq = pd.read_csv('./csv_producao/projetos_uniq.csv', header=0)
    dfpaper = pd.read_csv('./csv_producao/periodicos_all.csv', header=0)
    dfpaper_uniq = pd.read_csv('./csv_producao/periodicos_uniq.csv', header=0)
    # paper uniq
    dfpaper['ID'] = dfpaper['ID'].apply(ss)
    dfpaper_uniq['ID'] = dfpaper_uniq['ID'].apply(ss)
    # filtrando o ano
    # projetos
    dfppe_uniq['YEAR_INI'] = dfppe_uniq['YEAR_INI'].replace('VAZIO', -99)
    num99 = dfppe_uniq[dfppe_uniq['YEAR_INI'] == -99]
    if len(num99) >= 1:
        print('------------------------------------------------------------')
        print('ATENCAO: ' + str(len(num99)) + 'projetos sem ano inicial')
        print('------------------------------------------------------------')
    dfppe_uniq['YEAR_INI'] = dfppe_uniq['YEAR_INI'].apply(ff)
    dfppe_uniq = dfppe_uniq[(dfppe_uniq['YEAR_INI'] >= yyi)]
    # ------------------------------------------------------------
    # periodicos
    dfpaper['YEAR'] = dfpaper['YEAR'].replace('VAZIO', -99)
    dfpaper_uniq['YEAR'] = dfpaper_uniq['YEAR'].replace('VAZIO', -99)
    num99 = dfpaper[dfpaper['YEAR'] == -99]
    if len(num99) >= 1:
        print('------------------------------------------------------------')
        print('ATENCAO: ' + str(len(num99)) + 'artigos sem ano de publicacao')
        print('------------------------------------------------------------')
    dfpaper['YEAR'] = dfpaper['YEAR'].apply(ff)
    dfpaper_uniq['YEAR'] = dfpaper_uniq['YEAR'].apply(ff)
    dfpaper = dfpaper[(dfpaper['YEAR'] >= yyi) & (dfpaper['YEAR'] <= yyf)]
    dfpaper_uniq = dfpaper_uniq[(dfpaper_uniq['YEAR'] >= yyi)
                                & (dfpaper_uniq['YEAR'] <= yyf)]
    # ------------------------------------------------------------
    # ordenando por ano (crescente)
    dfppe_uniq_pesq = dfppe_uniq[dfppe_uniq['NATUREZA'] == 'PESQUISA']
    dfppe_uniq_pesq = dfppe_uniq_pesq.sort_values(['YEAR_INI'])
    dfppe_uniq_ext = dfppe_uniq[dfppe_uniq['NATUREZA'] == 'EXTENSAO']
    dfppe_uniq_ext = dfppe_uniq_ext.sort_values(['YEAR_INI'])
    dfpaper = dfpaper.sort_values(['YEAR'])
    dfpaper_uniq = dfpaper_uniq.sort_values(['YEAR'])
    # ------------------------------------------------------------
    # carregando df com dados pessoais
    lscsv_fullname = glob.glob('./csv_producao/*fullname.csv')
    # df com nome completo, sobrenome e id
    dffullname = pd.DataFrame()
    for i in range(len(lscsv_fullname)):
        a = pd.read_csv(lscsv_fullname[i], header=0, dtype='str')
        dffullname = dffullname.append(a, ignore_index=False)
    # passando ID para string, para poder comparar com dfpaper
    dffullname['ID'] = dffullname['ID'].apply(ss)
    dffullname = dffullname.reset_index(drop=True)
    # verificando a interacao de periodicos entre integrantes
    lsid = []
    lsid_tocompare = []
    lsinter_qtd = []
    for m in range(len(df_idlist)):
        idd = str(df_idlist.iloc[m, 0])
        lname = dffullname[dffullname['ID'] == idd]
        lname = lname.iloc[0, 1]
        lname = lname.upper()
        # lname = lname.split(';')
        # print(lname)
        dfids_tocompare = dffullname[dffullname['ID'] != str(idd)]
        for n in range(len(dfids_tocompare)):
            idd_tocompare = dfids_tocompare.iloc[n, 0]
            dd = dfpaper[dfpaper['ID'] == idd_tocompare]
            lsid.append(str(idd))
            lsid_tocompare.append(idd_tocompare)
            # DANGER ATTENTION FIX lname deve ser o nome completo
            # removendo caract desnecessarios
            interac = 0
            for o in range(len(dd)):
                authors = dd.iloc[o, 7].upper()
                authors = authors.replace('[', '')
                authors = authors.replace(']', '')
                authors = authors.replace("'", '')
                authors = authors.split(',')
                # print(authors)
                for op in range(len(authors)):
                    # print(authors[op])
                    if len(authors[op]) > 0:
                        if authors[op][0] == ' ':
                            authors[op] = authors[op][1:]
                # interac = 0
                inpaper = list(set([lname]) & set(authors))
                if len(inpaper) >= 1:
                    interac = interac + 1
                    # print(interac)
                    # print(lname)
                    # print(authors)
            lsinter_qtd.append(interac)
    dfinterac = pd.DataFrame({
        'IDD': lsid,
        'IDD_COMP': lsid_tocompare,
        'WEIGHT': lsinter_qtd
    })
    # data frame para profissionais sem interacao em periodicos
    lsnointer_period = []
    for m in range(len(df_idlist)):
        aano = dfinterac[dfinterac['IDD'] == df_idlist.iloc[m, 0]]
        aasum = aano['WEIGHT'].sum()
        aano_a = dfinterac[dfinterac['IDD_COMP'] == df_idlist.iloc[m, 0]]
        aasum_a = aano_a['WEIGHT'].sum()
        if aasum == 0 and aasum_a == 0:
            nointer = dffullname[dffullname['ID'] == df_idlist.iloc[
                m, 0]].reset_index(drop=True)
            nointer = nointer.iloc[0, 1]
            lsnointer_period.append(nointer)
    dfnointerac = pd.DataFrame({'NOME': lsnointer_period})
    dfnointerac.to_csv('./csv_producao/periodicos_nointer.csv',
                       index=False,
                       sep=',')
    # DANGER ATTENTION
    # dfinterac.to_csv('test.csv', index=False)
    # eliminando linhas sem interacao
    indexremove = []
    for i in range(len(lsid)):
        if lsinter_qtd[i] == 0:
            indexremove.append(i)
    for index in sorted(indexremove, reverse=True):
        del lsid[index]
        del lsid_tocompare[index]
        del lsinter_qtd[index]
    # ------------------------------------------------------------
    # Grapho
    plt.figure(figsize=(12, 9.5))
    G = nx.Graph()
    for i in range(len(lsid)):
        G.add_edge(lsid[i], lsid_tocompare[i], weight=lsinter_qtd[0])
    pos = nx.spring_layout(G, 1.75)
    # colors for nodes
    colours = [
        '#5a7d9a', 'red', 'green', 'yellow', 'gray', 'orange', 'blue',
        'magenta', '#00555a', '#f7d560', 'cyan', '#b6b129', '#a1dd72',
        '#d49acb', '#d4a69a', '#977e93', '#a3cc72', '#c60acb', '#d4b22a',
        '#255e53', '#77525a', '#c7d511', '#c4c22b', '#c9b329', '#c8dd22',
        '#f75acb', '#b1a40a', '#216693', '#b1cd32', '#b33acb', '#c9a32b',
        '#925e11', '#c5dd39', '#d04205', '#d8a82a', '#373e29'
    ]
    lsgroup_uniq = df_idlist['GROUP'].unique()
    dic_colours = {}
    for i in range(len(lsgroup_uniq)):
        dic_colours[lsgroup_uniq[i]] = colours[i]
    a = list(G.nodes())
    node_colours = []
    for i in range(len(a)):
        x = df_idlist[df_idlist['ID_LATTES'] == a[i]]
        x = x.iloc[0, 2]
        c = dic_colours[x]
        node_colours.append(c)
    # nodes
    nx.draw_networkx_nodes(G,
                           pos,
                           node_size=400,
                           node_shape='o',
                           node_color=node_colours,
                           alpha=0.7)
    # labels
    nn = list(G.nodes)
    diclabel = {}
    for i in range(len(nn)):
        x = df_idlist[df_idlist['ID_LATTES'] == nn[i]]
        xid = x.iloc[0, 0]
        xname = x.iloc[0, 1]
        diclabel[str(xid)] = xname
    # edges
    nx.draw_networkx_edges(
        G,
        pos,  # edgelist=lsinter_qtd,
        width=1,
        edge_color='orange')
    # labels
    nx.draw_networkx_labels(G,
                            pos,
                            labels=diclabel,
                            font_size=16,
                            font_family='sans-serif')
    plt.axis('off')
    plt.tight_layout()
    plt.savefig('./relatorio/figures/grapho.png')
Example #3
0
def capes_indprodart():
    # lendo pesquisadores
    df_idlist = readIdList()
    num_dp = len(df_idlist)
    # lendo periodicos_uniq
    df = pd.read_csv('./csv_producao/periodicos_uniq.csv',
                     header=0, sep=',')
    num_period_tot = len(df['QUALIS'])
    df.query('QUALIS != "XX"', inplace=True)
    df.query('QUALIS != "C "', inplace=True)
    df['YEAR'] = df['YEAR'].apply(iint)
    num_period_semqualis = num_period_tot - len(df['QUALIS'])
    print('Numero de periodicos sem QUALIS = ', num_period_semqualis)
    # definindo os quadrienios
    year_fquadrien = 2013
    ls_quadri = [year_fquadrien]
    for i in range(5):
        year_fquadrien = year_fquadrien + 4
        ls_quadri.append(year_fquadrien)
        # print(ls_quadri)
    # calculo para cada trienio
    ls_yini_quad = []
    ls_yfin_quad = []
    ls_indprodart = []
    for i in range(len(ls_quadri)):
        yini = ls_quadri[i]
        yfin = ls_quadri[i] + 3
        df_qtdby_qualis = df.groupby(['YEAR', 'QUALIS'])[
            'TITLE'].count().reset_index()
        df_qtdby_qualis.columns = ['YEAR', 'QUALIS', 'AMOUNT']
        # print(df_qtdby_qualis)
        df_qtdby_qualis.query('YEAR >= @yini and YEAR <= @yfin', inplace=True)
        if len(df_qtdby_qualis) < 1:
            pass
        else:
            # print('Quadrienio', yini, ' - ', yfin)
            df_qtdby_qualis['PESO'] = df_qtdby_qualis['QUALIS'].apply(
                fun_indprodart_classif)
            df_qtdby_qualis['PROD_AMOUPESO'] = df_qtdby_qualis['AMOUNT'] * \
                df_qtdby_qualis['PESO']
            # verificando representatividade B4 e B5 deve ser <= 0.2 por ano
            # print(df_qtdby_qualis)
            df_grade_tot_year = df_qtdby_qualis.groupby(
                ['YEAR'])['PROD_AMOUPESO'].sum().reset_index()
            df_qtdby_qualis_b4b5 = df_qtdby_qualis.query(
                'QUALIS == "B4" or QUALIS == "B5"')
            # print(df_grade_tot_year)
            # print(df_qtdby_qualis_b4b5)
            ls_years_b4b5_uniq = df_qtdby_qualis_b4b5['YEAR'].unique()
            for ia in range(len(ls_years_b4b5_uniq)):
                year_b4b5 = ls_years_b4b5_uniq[ia]
                df_yearb4b5 = df_qtdby_qualis_b4b5.query('YEAR == @year_b4b5')
                grade_tot_year_b4b5 = df_yearb4b5['PROD_AMOUPESO'].sum()
                df_temp = df_grade_tot_year.query('YEAR == @year_b4b5')
                grade_tot_year = df_temp['PROD_AMOUPESO'].sum()
                # print('Ano ', str(year_b4b5), 'B4 e B5 representam: ',
                #       str(round(grade_tot_year_b4b5 / grade_tot_year, 2)))
                if grade_tot_year_b4b5 / grade_tot_year > 0.2:
                    print('Para o ano ', str(year_b4b5),
                          'artigos B4 B5 glosados, maior que 0.2')
                    df_qtdby_qualis.query(
                        'YEAR != @year_b4b5 and QUALIS != "B4"', inplace=True)
                    df_qtdby_qualis.query(
                        'YEAR != @year_b4b5 and QUALIS != "B5"', inplace=True)
            df_qtdby_qualis = df_qtdby_qualis.groupby(
                ['YEAR'])['PROD_AMOUPESO'].sum() / num_dp
            df_qtdby_qualis = df_qtdby_qualis.reset_index()
            df_qtdby_qualis.columns = ['YEAR', 'INDPRODART']
            indprodart = df_qtdby_qualis['INDPRODART'].mean()
            ls_indprodart.append(indprodart)
            ls_yini_quad.append(yini)
            ls_yfin_quad.append(yfin)
    df_indprodart = pd.DataFrame({'QUADRIENIO_INI': ls_yini_quad,
                                  'QUADRIENIO_FIM': ls_yfin_quad,
                                  'INDPRODART': ls_indprodart})
    pathfilename = str('./csv_producao/' + 'capesindex_indprodart'  '.csv')
    df_indprodart.to_csv(pathfilename, index=False)
    print(pathfilename, ' gravado com', len(df_indprodart), ' quadrienios')
Example #4
0
def capes_distindproddp():
    # lendo pesquisadores
    df_idlist = readIdList()
    num_dp = len(df_idlist)
    # lendo periodicos_uniq
    df = pd.read_csv('./csv_producao/periodicos_uniq.csv',
                     header=0, sep=',')
    num_period_tot = len(df['QUALIS'])
    df.query('QUALIS != "XX"', inplace=True)
    df.query('QUALIS != "C "', inplace=True)
    df['YEAR'] = df['YEAR'].apply(iint)
    num_period_semqualis = num_period_tot - len(df['QUALIS'])
    print('Numero de periodicos sem QUALIS = ', num_period_semqualis)
    # definindo os quadrienios
    year_fquadrien = 2013
    ls_quadri = [year_fquadrien]
    for i in range(5):
        year_fquadrien = year_fquadrien + 4
        ls_quadri.append(year_fquadrien)
        # print(ls_quadri)
    # calculo para cada trienio
    ls_yini_quad = []
    ls_yfin_quad = []
    df_indprodart_full = pd.DataFrame(columns=['QUADRIENIO',
                                               'FULL_NAME',
                                               'INDPRODART',
                                               'CLASSIF'])
    for i in range(len(ls_quadri)):
        yini = ls_quadri[i]
        yfin = ls_quadri[i] + 3
        df_qtdby_qualis = df.groupby(['YEAR', 'FULL_NAME',
                                      'QUALIS'])['TITLE'].count().reset_index()
        df_qtdby_qualis.columns = ['YEAR', 'FULL_NAME', 'QUALIS', 'AMOUNT']
        # print(df_qtdby_qualis)
        df_qtdby_qualis.query('YEAR >= @yini and YEAR <= @yfin', inplace=True)
        if len(df_qtdby_qualis) < 1:
            pass
        else:
            # print('Quadrienio', yini, ' - ', yfin)
            df_qtdby_qualis['PESO'] = df_qtdby_qualis['QUALIS'].apply(
                fun_indprodart_classif)
            df_qtdby_qualis['PROD_AMOUPESO'] = df_qtdby_qualis['AMOUNT'] * \
                df_qtdby_qualis['PESO']
            # verificando representatividade B4 e B5 deve ser <= 0.2 por ano
            # print(df_qtdby_qualis)
            df_grade_tot_year = df_qtdby_qualis.groupby(
                ['YEAR'])['PROD_AMOUPESO'].sum().reset_index()
            df_qtdby_qualis_b4b5 = df_qtdby_qualis.query(
                'QUALIS == "B4" or QUALIS == "B5"')
            # print(df_grade_tot_year)
            # print(df_qtdby_qualis_b4b5)
            ls_years_b4b5_uniq = df_qtdby_qualis_b4b5['YEAR'].unique()
            for ia in range(len(ls_years_b4b5_uniq)):
                year_b4b5 = ls_years_b4b5_uniq[ia]
                df_yearb4b5 = df_qtdby_qualis_b4b5.query('YEAR == @year_b4b5')
                grade_tot_year_b4b5 = df_yearb4b5['PROD_AMOUPESO'].sum()
                df_temp = df_grade_tot_year.query('YEAR == @year_b4b5')
                grade_tot_year = df_temp['PROD_AMOUPESO'].sum()
                # print('Ano ', str(year_b4b5), 'B4 e B5 representam: ',
                #       str(round(grade_tot_year_b4b5 / grade_tot_year, 2)))
                if grade_tot_year_b4b5 / grade_tot_year > 0.2:
                    print('Para o ano ', str(year_b4b5),
                          'artigos B4 B5 glosados, maior que 0.2')
                    df_qtdby_qualis.query(
                        'YEAR != @year_b4b5 and QUALIS != "B4"', inplace=True)
                    df_qtdby_qualis.query(
                        'YEAR != @year_b4b5 and QUALIS != "B5"', inplace=True)
            df_qtdby_qualis = df_qtdby_qualis.groupby(
                ['YEAR', 'FULL_NAME'])['PROD_AMOUPESO'].sum() / num_dp
            df_qtdby_qualis = df_qtdby_qualis.reset_index()
            df_qtdby_qualis.columns = ['YEAR', 'FULL_NAME', 'INDPRODART']
            df_qtdby_qualis = df_qtdby_qualis.groupby(
                ['FULL_NAME'])['INDPRODART'].mean().reset_index()
            df_qtdby_qualis['CLASSIF'] = df_qtdby_qualis['INDPRODART'].apply(
                fun_indori_classif)
            quadr = str(str(yini) + '-' + str(yfin))
            df_qtdby_qualis['QUADRIENIO'] = np.repeat(
                quadr, len(df_qtdby_qualis))
            df_qtdby_qualis = df_qtdby_qualis[[
                'QUADRIENIO', 'FULL_NAME', 'INDPRODART', 'CLASSIF']]
            df_indprodart_full = pd.concat(
                [df_indprodart_full, df_qtdby_qualis], axis=0)
    pathfilename = str('./csv_producao/' + 'capesindex_distindproddp_doce.csv')
    df_indprodart_full.to_csv(pathfilename, index=False)
    print(pathfilename, ' gravado com', len(
        df_indprodart_full), ' pesquisadores para todos os quadrienios')
    # qd = df_indprodart_full['QUADRIENIO'].unique()
    df_distindproddp = df_indprodart_full.groupby(['QUADRIENIO', 'CLASSIF'])[
        'FULL_NAME'].count().reset_index()
    df_distindproddp.columns = ['QUADRIENIO', 'CLASSIF', 'COUNT']
    df_distindproddp['DISTINDPRODDP'] = 100 * \
        df_distindproddp['COUNT'] / num_dp
    df_distindproddp.query('CLASSIF != "FRACO" and CLASSIF != \
                           "DEFICIENTE"', inplace=True)
    df_distindproddp = df_distindproddp.groupby(
        ['QUADRIENIO'])['DISTINDPRODDP'].sum().reset_index()
    pathfilename = str('./csv_producao/' + 'capesindex_distindproddp'  '.csv')
    df_distindproddp.to_csv(pathfilename, index=False)
    print(pathfilename, ' gravado com', len(
        df_distindproddp), ' quadrienios')
Example #5
0
def capes_indautdis():
    # nome ppg
    # config_file = open('./config.txt', 'r')
    config_file = open('./config.txt', 'r', encoding='utf-8')
    name_ppg = config_file.readlines()[8].split(':')[1]
    name_ppg = name_ppg.rstrip('\n')
    name_ppg = name_ppg.strip(' ')
    name_ppg = fun_uppercase(name_ppg)
    config_file.close()
    # lendo pesquisadores
    df_idlist = readIdList()
    num_dp = len(df_idlist)
    # lendo orientacoes_all
    df = pd.read_csv('./csv_producao/orientacoes_all.csv',
                     header=0, sep=',')
    df = df.query('NATURE == "Dissertação de mestrado" \
                   or NATURE == "Tese de doutorado"')
    df = df.query('TYPE != "CO_ORIENTADOR"').reset_index(drop=True)
    df['COURSE'] = df['COURSE'].apply(fun_uppercase)
    df['STUDENT'] = df['STUDENT'].apply(fun_uppercase)
    df['FULL_NAME'] = df['FULL_NAME'].apply(fun_uppercase)
    df = df.query('COURSE == @name_ppg')
    # identificando os ppg dos pesquisadores
    ls_ppgs = df['COURSE'].unique()
    ls_ppgs.sort()
    ls_ppgs = ", ".join(ls_ppgs)
    # print('------------------------------------------------------------')
    # print("PPGs listados nos curriculos dos pesquisadores: ", ls_ppgs)
    # print('------------------------------------------------------------')
    # print("PPG a ser avaliado: ", name_ppg)
    # print('------------------------------------------------------------')
    # definindo os quadrienios
    year_fquadrien = 2013
    ls_quadri = [year_fquadrien]
    df_indautdisc_all = pd.DataFrame(
        columns=['QUADRIENIO', 'STUDENT', 'DOCENTE', 'TYPE', 'AMOUNT'])
    for i in range(5):
        year_fquadrien = year_fquadrien + 4
        ls_quadri.append(year_fquadrien)
        # print(ls_quadri)
    # calculo para cada trienio
    for i in range(len(ls_quadri)):
        ls_yini_quad = []
        ls_yfin_quad = []
        ls_disc = []
        ls_doce = []
        ls_disc_type_prod = []
        ls_disc_amount_prod_period = []
        yini = ls_quadri[i]  # egressos ate 5 anos
        yfin = ls_quadri[i] + 3
        # print('Quadrienio', yini, ' - ', yfin)
        df_disc_quadri = df.query('YEAR >= @yini+3-4 and YEAR <= @yfin+3')
        df_disc_quadri
        if len(df_disc_quadri) < 1:
            print("sem orientacoes para o periodo")
            pass
        else:
            ls_disc_period = list(df_disc_quadri['STUDENT'])
            ls_doce_period = list(df_disc_quadri['FULL_NAME'])
            df_period_all = pd.read_csv('./csv_producao/periodicos_all.csv',
                                        header=0, sep=',')
            df_period_all['AUTHOR'] = df_period_all['AUTHOR'].apply(
                fun_uppercase)
            for ia in range(len(ls_disc_period)):
                period_count = 0
                for ib in range(len(df_period_all)):
                    # print(ia.upper(), '---', df_period_all.iloc[ib, 7])
                    zdis = ls_disc_period[ia].split(' ')[-1]
                    zdoc = ls_doce_period[ia].split(' ')[-1]
                    zaut = df_period_all['AUTHOR'].iloc[ib]
                    if zdis in zaut and zdoc in zaut:
                        period_count += 1
                disc_type_prod = 'periodico'
                quadr = str(str(yini) + '-' + str(yfin))
                ls_disc.append(ls_disc_period[ia])
                ls_doce.append(ls_doce_period[ia])
                ls_disc_type_prod.append(disc_type_prod)
                ls_disc_amount_prod_period.append(period_count)
                ls_quad = np.repeat(quadr, len(ls_disc))
                df_indautdisc = pd.DataFrame(list(zip(ls_quad,
                                                      ls_disc,
                                                      ls_doce,
                                                      ls_disc_type_prod,
                                                      ls_disc_amount_prod_period)),
                                             columns=['QUADRIENIO', 'STUDENT', 'DOCENTE', 'TYPE', 'AMOUNT'])
            df_indautdisc_all = pd.concat([df_indautdisc_all,
                                           df_indautdisc])
    ls_indautdisc_quad = []
    ls_indautdisc = []
    ls_indis = []
    quad_indautdisc = df_indautdisc_all['QUADRIENIO'].unique()
    for i in range(len(quad_indautdisc)):
        q = quad_indautdisc[i]
        df_d = df_indautdisc_all.query('QUADRIENIO == @q')
        disc_zero = len(df_d.query('AMOUNT == 0'))
        E = (len(df_d) - disc_zero)
        F = len(df_d)
        G = df_d['AMOUNT'].sum()
        indaut = E / F
        indis = G / F
        # print(indaut, '--', indis, '--', disc_zero, F)
        ls_indautdisc_quad.append(q)
        ls_indautdisc.append(indaut)
        ls_indis.append(indis)
    df_indiscente = pd.DataFrame(list(zip(ls_indautdisc_quad,
                                          ls_indautdisc,
                                          ls_indis)),
                                 columns=['QUADRIENIO', 'INDOUT',
                                          'INDIS'])
    pathfilename = str('./csv_producao/' + 'capesindex_indautdis'  '.csv')
    df_indiscente.to_csv(pathfilename, index=False)
    print(pathfilename, ' gravado com', len(df_indiscente), ' quadrienios')
Example #6
0
def capes_indori():
    # nome ppg
    # config_file = open('./config.txt', 'r')
    config_file = open('./config.txt', 'r', encoding='utf-8')
    name_ppg = config_file.readlines()[8].split(':')[1]
    name_ppg = name_ppg.rstrip('\n')
    name_ppg = name_ppg.strip(' ')
    name_ppg = fun_uppercase(name_ppg)
    config_file.close()
    # lendo pesquisadores
    df_idlist = readIdList()
    num_dp = len(df_idlist)
    # lendo orientacoes_all
    df = pd.read_csv('./csv_producao/orientacoes_all.csv',
                     header=0, sep=',')
    df = df.query('NATURE == "Dissertação de mestrado" \
                   or NATURE == "Tese de doutorado"')
    df = df.query('TYPE != "CO_ORIENTADOR"').reset_index(drop=True)
    df['COURSE'] = df['COURSE'].apply(fun_uppercase)
    df = df.query('COURSE == @name_ppg')
    # identificando os ppg dos pesquisadores
    ls_ppgs = df['COURSE'].unique()
    ls_ppgs.sort()
    ls_ppgs = ", ".join(ls_ppgs)
    # print('------------------------------------------------------------')
    # print("PPGs listados nos curriculos dos pesquisadores: ", ls_ppgs)
    # print('------------------------------------------------------------')
    # print("PPG a ser avaliado: ", name_ppg)
    # print('------------------------------------------------------------')
    # definindo os quadrienios
    year_fquadrien = 2013
    ls_quadri = [year_fquadrien]
    for i in range(5):
        year_fquadrien = year_fquadrien + 4
        ls_quadri.append(year_fquadrien)
        # print(ls_quadri)
    # calculo para cada trienio
    ls_yini_quad = []
    ls_yfin_quad = []
    ls_indori = []
    for i in range(len(ls_quadri)):
        yini = ls_quadri[i]
        yfin = ls_quadri[i] + 3
        # print('Quadrienio', yini, ' - ', yfin)
        df_qtdby_yradv = df.groupby(['YEAR', 'NATURE'])[
            'STUDENT'].count().reset_index()
        # print(df_qtdby_yradv)
        df_qtdby_yradv.query('YEAR >= @yini and YEAR <= @yfin', inplace=True)
        if len(df_qtdby_yradv) < 1:
            pass
        else:
            df_qtdby_yradv['PESO_DEF'] = df_qtdby_yradv['NATURE'].apply(
                fun_peso_defesa)
            df_qtdby_yradv['PROD_STUPESO'] = (df_qtdby_yradv['STUDENT'] *
                                              df_qtdby_yradv['PESO_DEF'])
            df_qtdby_yradv = df_qtdby_yradv.groupby(
                ['YEAR'])['PROD_STUPESO'].sum() / num_dp
            df_qtdby_yradv = df_qtdby_yradv.reset_index()
            df_qtdby_yradv.columns = ['YEAR', 'INDORI']
            indori_quad = df_qtdby_yradv['INDORI'].mean()
            ls_yini_quad.append(yini)
            ls_yfin_quad.append(yfin)
            ls_indori.append(round(indori_quad, 3))
            # print(df_qtdby_yradv)
            # print(indori_quad)
    df_indori = pd.DataFrame({'QUADRIENIO_INI': ls_yini_quad,
                              'QUADRIENIO_FIM': ls_yfin_quad,
                              'INDORI': ls_indori})
    df_indori['INDORI_CLASSIFICACAO'] = df_indori['INDORI'].apply(
        fun_indori_classif)
    pathfilename = str('./csv_producao/' + 'capesindex_indori'  '.csv')
    df_indori.to_csv(pathfilename, index=False)
    print(pathfilename, ' gravado com', len(df_indori), ' quadrienios')