def main():
    #calc the 'Total citations'
    print("Calculating relative data of PoPCites.csv...")

    last2quarters_hyphen = func.get_last_2_quarters(connector='-')
    path_pop = func.get_abspath_pop()
    #     # calc total citations
    df_pop = pd.read_csv(path_pop)
    popcites = df_pop.loc[(df_pop['Year'] > 2009 ),['Year','Authors','Title','Cites']] # filter conditions: Year > 2009
    popcites['Title'] = popcites['Title'].str.lower()
    popcites['Year'] = popcites['Year'].astype(int)
    popcites_remove_duplicate = popcites.drop_duplicates(subset = 'Title')
    total_cites = popcites_remove_duplicate['Cites'].sum()
    path_pop_new = os.path.abspath('')+'\data\{}\PoPCites clean.csv'.format(last2quarters_hyphen)
    popcites_remove_duplicate.to_csv(path_pop_new,index=False,encoding='utf_8_sig')

    # automatically edit 'PoPCites_delta'
    halfyear_before_today = datetime.date.today()-datetime.timedelta(180)
    path_delta_pre = os.path.abspath('') + '\data\{}\citation_delta.csv'.format(func.get_last_2_quarters(date=halfyear_before_today,connector='-'))
    path_delta =os.path.abspath('')+'\data\{}\citation_delta.csv'.format(last2quarters_hyphen)
    df_delta = pd.read_csv(path_delta_pre,encoding='utf-8-sig')
    df_delta.replace(np.nan,0,inplace=True)
    df_delta.replace(np.inf,0,inplace=True)
    querydate_quarter = func.get_last_quarter(connector='_')
    querydate_quarter_pre = func.get_last_quarter(halfyear_before_today,connector='_')
    total_cites_pre = df_delta[df_delta['QueryDateQuarter']== querydate_quarter_pre].iloc[0]['Cites']
    # idx_delta = df_delta.index[df_delta['QueryDateQuarter'] == querydate_quarter_pre].to_list()[0] + 1
    delta_cites = total_cites - int(total_cites_pre)
    df_newrow = pd.DataFrame(data=[[querydate_quarter,total_cites,delta_cites]],columns=['QueryDateQuarter','Cites','delta'])
    if df_newrow['QueryDateQuarter'].isin(df_delta['QueryDateQuarter']).bool() == False:
        df_delta = df_delta.append(df_newrow,ignore_index=True)
    df_delta.to_csv(path_delta,index=False,encoding='utf-8-sig')

    print("Successfully calculated data and saved under path: {}".format(path_delta))
Example #2
0
def main():
    print('Cleaning Excel file from KIT Open...')

    # clean report of ISSD
    path_report_issd = func.get_abspath_report()
    path_report_clean_issd = func.get_abspath_folder_lastquarter() + 'report_{}_clean.xlsx'.format(
        func.get_last_2_quarters(connector=''))
    clean_report(path_report_issd,path_report_clean_issd)
    print('Successfully created cleaned report of ISSD under path: {}'.format(path_report_clean_issd))

    # clean report of IISM
    path_report_iism = func.get_abspath_folder_lastquarter()+'report_im_{}.xlsx'.format(func.get_last_2_quarters())
    path_report_clean_iism = func.get_abspath_folder_lastquarter() + 'report_im_{}_clean.xlsx'.format(
        func.get_last_2_quarters(connector=''))
    clean_report(path_report_iism, path_report_clean_iism)
    print('Successfully created cleaned report of IM under path: {}'.format(path_report_clean_issd))
Example #3
0
def main():
    # create folder if it does not exist yet
    path_lastqtr = func.get_abspath_folder_lastquarter()
    print(path_lastqtr)
    if not os.path.exists(path_lastqtr):
        os.makedirs(path_lastqtr)

    # execute relative modules
    #   get and clean data
    search_string.main()
    search_string_im.main()
    get_KITopen.main()
    get_KITopen_im.main()
    clean_report.main()

    # predict tag
    predict_tag.main()

    #   calculate PoPCites.scv
    while os.path.exists(path=func.get_abspath_folder_lastquarter() +
                         "PoPCites.csv") == False:
        pause = input(
            "Please add PoPCites.csv of {} under the path: {} and then enter any values to continue"
            .format(func.get_last_2_quarters(),
                    func.get_abspath_folder_lastquarter()))
    calc_cites_delta.main()

    #   get author relationship and network
    get_data_network.main()
    get_network.main()

    #   get wordcloud
    get_wordcloud.main()
def main():
    print("Generating wordclouds of paper titles...")

    # check folder exists
    path_report_title = func.get_abspath_folder_lastquarter()+'report title'
    if not os.path.exists(path_report_title):
        os.makedirs(path_report_title)

    # get report and generate wordcloud of issd report
    path_report_issd = func.get_abspath_folder_lastquarter()+'\\report_{}_clean.xlsx'.format(func.get_last_2_quarters())
    df_report_issd = pd.read_excel(path_report_issd)
    df_title_issd = df_report_issd['Titel']
    df_title_issd.to_csv(path_report_title+'\\report_title_issd_{}.txt'.format(func.get_last_2_quarters()),index=False)
    path_title_issd = path_report_title+'\\report_title_issd_{}.txt'.format(func.get_last_2_quarters())
    path_wordcloud_issd = func.get_abspath_folder_lastquarter()+'wordcloud_issd.png'

    get_wordcloud(path_title_issd,path_wordcloud_issd)

    # get report and report from last years
    lastyr = datetime.datetime.now().year - 1
    for i in range(4):
        if i <= 3:
            df_title_issd_tmp = df_report_issd.loc[(df_report_issd['Erscheinungsjahr'] == lastyr-i),['Titel']]
        elif i == 4:
            df_title_issd_tmp = df_report_issd.loc[(df_report_issd['Erscheinungsjahr'] <= (lastyr-i)), ['Titel']]
        df_title_issd_tmp.to_csv(path_report_title+'\\report_title_issd_{}.txt'.format(lastyr-i),index=False)
        path_title_issd_tmp = path_report_title+'\\report_title_issd_{}.txt'.format(lastyr-i)
        path_wordcloud_issd_tmp = func.get_abspath_folder_lastquarter() + 'wordcloud_issd_{}.png'.format(lastyr-i)

        get_wordcloud(path_title_issd_tmp, path_wordcloud_issd_tmp,width_value=1000,height_value=600)

    #get report and generate .txt of iism report
    path_report_im = func.get_abspath_folder_lastquarter() + 'report_im_{}.xlsx'.format(func.get_last_2_quarters())
    df_report_issd = pd.read_excel(path_report_im, sheet_name='Publikationen')
    df_title_issd = df_report_issd['Titel']
    df_title_issd.to_csv(path_report_title+'\\report_title_im_{}.txt'.format(func.get_last_2_quarters()),index=False)
    path_title_iism = path_report_title+'\\report_title_im_{}.txt'.format(func.get_last_2_quarters())
    path_wordcloud_im = func.get_abspath_folder_lastquarter() + 'wordcloud_im.png'

    get_wordcloud(path_title_iism, path_wordcloud_im)

    print("Successfully generated wordclous of paper titles")
def main():
    print("Getting data of IM from KIT-Open...")

    path_search_str = os.path.abspath(
        '') + '\\researchers list\search string_im {}.txt'.format(
            func.get_last_2_quarters(connector='-'))
    with open(path_search_str, 'r', encoding='utf-8') as f:
        search_str = f.read()
    url_author_str = func.replace_name_url(search_str)
    url = 'https://publikationen.bibliothek.kit.edu/auswertungen/report.php?external_publications=all&open_access_availability=do_not_care&full_text=do_not_care&key_figures=number_of_publications&year=2010-&consider_online_advance_publication_date=true&consider_additional_pof_structures=false&row=type&column=year&authors='\
            + url_author_str\
            + '&in_opac=false&format=excel&publications=true'
    r = requests.get(url, allow_redirects=True)
    path_report = func.get_abspath_folder_lastquarter(
    ) + 'report_im_{}.xlsx'.format(func.get_last_2_quarters())
    with open(path_report, 'wb') as f:
        f.write(r.content)

    print(
        "Successfully saved data of IM into Excel file under path: {}".format(
            path_report))
Example #6
0
def main():
    print("Getting data of authors...")

    # get namelist of authors
    path_namelist = func.get_abspath_researcher()
    namelist = pd.read_excel(path_namelist)
    firstname = namelist['First name']
    lastname = namelist['Last name']
    fullname_list = lastname + ', ' + firstname  # format example: "Jasper, Feine"
    # print(fullname_list)

    # read report and get author list
    path_report = func.get_abspath_folder_lastquarter()+\
                  'report_{}_clean.xlsx'.format(func.get_last_2_quarters(connector=''))
    df_report = pd.read_excel(path_report)
    df_author = df_report['Autor']
    df_author_eng = func.replace_ger_char(df_author)
    # print(df_author_eng)

    # get matrix
    x = np.zeros(shape=(len(df_author_eng), len(fullname_list)),
                 dtype=np.int,
                 order='C')
    for i in range(len(df_author_eng)):
        for h in range(len(fullname_list)):
            if df_author_eng[i].find(fullname_list[h]) != -1:
                x[i, h] = 1
    # print(x)

    # merge result and generate .xlsx
    df_result = pd.DataFrame(data=x, columns=fullname_list)
    # print(df_result)
    path_data_network = os.path.abspath(
        '') + '\data\{}\data_network_{}.xlsx'.format(
            func.get_last_2_quarters(connector='-'),
            func.get_last_2_quarters())
    df_result.to_excel(path_data_network, index=False)

    print("Successfully saved author data under path: {}".format(
        path_data_network))
def main():
    print("Getting search string of IM ...")

    # get team list
    url_team = "https://im.iism.kit.edu/team.php"
    html_team = func.get_html(url_team)
    soup_team = BeautifulSoup(html_team,"html.parser")
    last_name = []
    first_name = []
    for table in soup_team.find_all('table',class_="collapseTable"):
        caption = table.find('caption',align = 'top')
        # select only prof., postdocs, doctoral researcher and junior research
        if (str(caption.text).find("Leitung") != -1) \
            or (str(caption.text).find("Forschungsgruppenleitung") != -1)\
            or (str(caption.text).find("Wissenschaftliche Mitarbeiter*Innen am KIT und FZI") != -1)\
            or (str(caption.text).find("Junior Researchers") != -1)\
            :
            for tr in table.find_all('tr'):
                for a in tr.find_all('a',itemprop='name'):
                    name = str(a.text)
                    temp = name.split(", ")
                    last_name.append(temp[0])
                    first_name.append(temp[1])
    namelist = {'Last name':last_name,'First name':first_name}
    df_team_de = pd.DataFrame(namelist)
    df_team = df_team_de.copy()
    df_team = func.replace_ger_char(df_team)

    df_search = df_team.copy()
    last_name_flag_de = df_team_de['Last name'].isin(df_team['Last name'])
    first_name_flag_de = df_team_de['First name'].isin(df_team['First name'])
    for i in range(len(df_team_de)):
        if (last_name_flag_de[i]==False) or (first_name_flag_de[i]==False):
            df_search = df_search.append(df_team_de.loc[i],ignore_index=True)

    # generate search string and save it to txt-file
    search_str = ''
    for i in range(len(df_search)):
        if (str(df_search['Last name'][i]).find(' ')!= -1) or (str(df_search['First name'][i]).find(' ')!= -1):
            search_str +='"'+ df_search['Last name'][i] + ', '+ df_search['First name'][i] + '"'
        else:
            search_str += df_search['Last name'][i] + ', '+ df_search['First name'][i]
        if i != (len(df_search)-1):
            search_str += ' OR '
    path_search_str = os.path.abspath('')+'\\researchers list\search string_im {}.txt'.format(func.get_last_2_quarters(connector='-'))
    with open(path_search_str,"w",encoding='utf-8') as f:
        f.write(search_str)

    print("Successfully created file of search string under path: {}".format(path_search_str))
def main():
    print("Predicting blank KIT-tags in the report...")

    path_report_clean = func.get_abspath_folder_lastquarter(
    ) + 'report_{}_clean.xlsx'.format(func.get_last_2_quarters(connector=''))
    # path_report_clean = "ML-model training/test set.xlsx"
    df_report = pd.read_excel(path_report_clean)
    df_blank = df_report.loc[(df_report['KIT-Tagging'].isnull()),
                             ['Publikationstyp', 'Autor', 'Quelle']]
    df_blank = df_blank.reset_index(drop=True)

    if len(df_blank) > 0:
        author_eng = func.replace_ger_char(df_blank['Autor'])
        # pub_typ = df_blank['Publikationstyp']
        quelle = df_blank['Quelle']
        # print(quelle)

        path_tag = 'MasterData/master_data_ranking_2020.xlsx'
        publication = pd.read_excel(path_tag, sheet_name='Publications')
        kit_tag = publication['kit_tag']

        path_input = 'ML-model training/ML input.xlsx'
        df_input = pd.read_excel(
            path_input,
            sheet_name='top125')['Input']  #change version of input list
        # # **** quelle ****
        x = np.zeros(shape=(len(quelle), len(df_input)),
                     dtype=np.int,
                     order='C')
        for i in range(len(author_eng)):
            for h in range(len(df_input)):
                if quelle[i].find(df_input[h]) != -1:
                    x[i, h] = 1

        # ML prediction
        path_model = 'ML-model training/tag_model.h5'
        model = load_model(path_model)
        x_t = model.predict(x)
        df_top3 = pd.DataFrame(columns=['No.1', 'No.2', 'No.3'])
        for i in range(len(x_t)):
            top_k = 3
            arr = x_t[i]
            top_k_idx = arr.argsort()[-top_k:][::-1]
            df_newrow = pd.DataFrame(data=[[
                kit_tag[top_k_idx[0]],
                kit_tag[top_k_idx[1]],
                kit_tag[top_k_idx[2]],
            ]],
                                     columns=['No.1', 'No.2', 'No.3'])
            df_top3 = df_top3.append(df_newrow, ignore_index=True)

        # save result
        path_predict = func.get_abspath_folder_lastquarter(
        ) + 'tag_prediction_{}.xlsx'.format(func.get_last_2_quarters())
        df_top3 = pd.concat([df_blank['Quelle'], df_top3], axis=1)
        df_top3.to_excel(path_predict, index=False)

        print(df_top3)
        print("Successfully saved predictions under path: {}".format(
            path_predict))

    else:
        print("There aren't any blank KIT-tags in the report")
def main():
    print("Getting search string of ISSD ...")

    # get latest team list
    url_team = "https://issd.iism.kit.edu/team.php"
    html_team = func.get_html(url_team)
    soup_team = BeautifulSoup(html_team,"html.parser")
    last_name = []
    first_name = []
    for table in soup_team.find_all('table',class_="collapseTable"):
        caption = table.find('caption',align = 'top')
        # select only prof., postdocs, doctoral researcher and junior research
        if (str(caption.text).find("Professor and Chairperson") != -1) \
            or (str(caption.text).find("PostDocs") != -1)\
            or (str(caption.text).find("Doctoral Researchers") != -1)\
            or (str(caption.text).find("Junior Researchers") != -1)\
            :
            for tr in table.find_all('tr'):
                for a in tr.find_all('a',itemprop='name'):
                    name = str(a.text)
                    temp = name.split(", ")
                    last_name.append(temp[0])
                    first_name.append(temp[1])
    namelist = {'Last name':last_name,'First name':first_name}
    df_team_de = pd.DataFrame(namelist)
    df_team_now = df_team_de.copy()
    df_team_now = func.replace_ger_char(df_team_now)

    # add team members who have already left (based on previous list)
    path_team_previous = os.path.abspath('') + \
                         '\\researchers list\\researchers list {}.xlsx'.format(func.get_last_2_quarters(date=datetime.date.today() - datetime.timedelta(180),connector='-'))
    df_team_previous = pd.read_excel(path_team_previous)
    df_team = pd.concat([df_team_now,df_team_previous],verify_integrity=True,ignore_index=True)

    # get HoF list
    path_hof = os.path.abspath('') + '\\researchers list\Hall of Fame.xlsx'
    df_hof = pd.read_excel(path_hof,sheet_name='HoF')

    # concat team list and HoF list and remove duplicates
    df_total = pd.concat([df_team,df_hof],verify_integrity=True,ignore_index=True)
    df_total = df_total.drop_duplicates()

    # save list and search string
    last2quarter_hyphen = func.get_last_2_quarters(connector='-')
    file_name = 'researchers list {}.xlsx'.format(last2quarter_hyphen)
    path_list = os.path.abspath('') + '\\researchers list\{}'.format(file_name)
    df_total.to_excel(excel_writer=path_list,index=False)

    # add german name to the dataframe
    df_search = df_total.copy()
    last_name_flag_de = df_team_de['Last name'].isin(df_total['Last name'])
    first_name_flag_de = df_team_de['First name'].isin(df_total['First name'])
    for i in range(len(df_team_de)):
        if (last_name_flag_de[i]==False) or (first_name_flag_de[i]==False):
            df_search = df_search.append(df_team_de.loc[i],ignore_index=True)

    # generate search string and save it to txt-file
    search_str = ''
    for i in range(len(df_search)):
        if (str(df_search['Last name'][i]).find(' ')!= -1) or (str(df_search['First name'][i]).find(' ')!= -1):
            search_str +='"'+ df_search['Last name'][i] + ', '+ df_search['First name'][i] + '"'
        else:
            search_str += df_search['Last name'][i] + ', '+ df_search['First name'][i]
        if i != (len(df_search)-1):
            search_str += ' OR '
    path_search_str = os.path.abspath('') + '\\researchers list\search string {}.txt'.format(last2quarter_hyphen)
    with open(path_search_str,"w",encoding='utf-8') as f:
        f.write(search_str)

    print("Successfully created file of search string under path: {}".format(path_search_str))
Example #10
0
def main():
    print("Training machine learning model...")

    path_namelist = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) + '\\researchers list\\researchers list {}.xlsx'.format(func.get_last_2_quarters(connector='-'))
    namelist = pd.read_excel(path_namelist)
    firstname = namelist['First name']
    lastname = namelist['Last name']
    fullname_list = lastname+', '+firstname  # format example: "Jasper, Feine"
    pub_typ_list = ['Proceedingsbeitrag','Zeitschriftenaufsatz']

    path_training = 'training set.xlsx'
    df_training = pd.read_excel(path_training, sheet_name='training')
    author = df_training['Autor']
    pub_typ = df_training['Publikationstyp']
    quelle = df_training['Quelle']
    classes = df_training['tag_pub']
    author_eng = replace_ger_char(author) # clean the data, transfer all the german characters to english

    path_input = 'ML input.xlsx'
    df_input = pd.read_excel(path_input,sheet_name='top125')['Input']

    # **** quelle ****
    x = np.zeros(shape=(len(quelle),len(df_input)),dtype=np.int,order='C')
    for i in range(len(author_eng)):
        for h in range(len(df_input)):
            if quelle[i].find(df_input[h]) != -1:
                x[i,h] = 1

    tag_file =  os.path.abspath(os.path.dirname(os.path.dirname(__file__))) + '\MasterData\master_data_ranking_2020.xlsx'
    publication = pd.read_excel(tag_file, sheet_name='Publications')
    kit_tag = publication['kit_tag']

    y = np.zeros(shape=(len(author_eng), len(kit_tag)), dtype=np.int, order='C') # result, there is only one '1' in each row to show the result
    z = np.zeros(shape=(len(author_eng)), dtype=np.int, order='C')
    sum = 0
    for i in range(len(author_eng)):
        for j in range(len(kit_tag)):
            if classes[i] == kit_tag[j]:
                y[i][j] = 1
                z[i] = j
                break
            if j == len(kit_tag) - 1:
                z[i] = -1
                sum = sum + 1
    x_train = np.zeros(shape=(len(author_eng)-sum, len(x[0])), dtype=np.int, order='C')
    x_test = np.zeros(shape=(sum, len(x[0])), dtype=np.int, order='C')
    y_train = np.zeros(shape=(len(author_eng)-sum, len(y[0])), dtype=np.int, order='C')
    y_test = np.zeros(shape=(sum, len(y[0])), dtype=np.int, order='C')
    index_train = 0
    index_test = 0
    for i in range(len(author_eng)):
        if z[i] == -1:
            x_test[index_test] = x[i]
            y_test[index_test] = y[i]
            index_test = index_test + 1
        else:
            x_train[index_train] = x[i]
            y_train[index_train] = y[i]
            index_train = index_train + 1

    input_sh = len(x[0])
    output_len = len(kit_tag)
    model = Sequential()
    model.add(Dense(128, input_shape=(input_sh,), activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(output_len, activation='softmax'))

    opt = keras.optimizers.Adam(learning_rate=0.01)
    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
    model.fit(x_train, y_train, epochs=200, batch_size=6)
    x_p = model.evaluate(x_train, y_train)
    save_dir = 'tag_model.h5'
    model.save(save_dir)

    print("Successfully saved model under the same folder")
def main():
    print("Getting input value from latest report...")

    path_report = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) + '\data\{}\\report_{}_clean.xlsx'.format(func.get_last_2_quarters(connector='-'),func.get_last_2_quarters())
    df_report = pd.read_excel(path_report)
    quelle = df_report['Quelle']

    # merge quelle into one string
    text_quelle = ''
    for single_que in quelle:
        text_quelle += single_que
    # print(text_quelle)

    # delete numbers and punctuation, keep letters only
    quelle_letters_only = re.sub("[^a-zA-Z]", " ", text_quelle)
    # print(quelle_letters_only)

    # tokenize and filter text
    stop_words = set(stopwords.words('english'))
    tokens = nltk.word_tokenize(quelle_letters_only)
    filtered_text = [w for w in tokens if not w in stop_words]

    # top 200
    fdist = FreqDist(filtered_text)
    tops = fdist.most_common(200)
    df_tops = pd.DataFrame(tops)
    print(df_tops)
    df_tops.to_excel('freq_words_top{}.xlsx'.format(len(df_tops)))

    print("Successfully saved most frequent words into 'freq_words_top{}.xlsx' under this folder".format(len(df_tops)))
def main():
    print("Generating cooperation network of authors...")

    path_data_network = func.get_abspath_folder_lastquarter(
    ) + 'data_network_{}.xlsx'.format(func.get_last_2_quarters())
    df_result = pd.read_excel(path_data_network)
    # print(df_result)

    # get namelist
    path_namelist = func.get_abspath_researcher()
    namelist = pd.read_excel(path_namelist)
    firstname = namelist['First name']
    lastname = namelist['Last name']
    fullname_list = lastname + ', ' + firstname
    fullname_list = fullname_list.drop([0])
    # print(fullname_list)

    # edges
    dict_edges = {}
    for fullname in fullname_list:
        dict_temp = {}
        df_temp = df_result.loc[(df_result[fullname] == 1)]
        for fullname2 in fullname_list:
            if fullname2 != fullname:
                dict_temp[fullname2] = df_temp[fullname2].sum()
        dict_edges[fullname] = dict_temp
    # print(dict_edges)

    # nodes
    dict_nodes = {}
    for fullname in fullname_list:
        df_temp = df_result.loc[(df_result[fullname] == 1)]
        dict_nodes[fullname] = df_temp[fullname].sum()
    # print(dict_nodes)

    # network
    nw_author = nx.Graph()
    for node in dict_nodes.keys():  #nodes
        if dict_nodes[node] > 0:
            nw_author.add_node(node, size=dict_nodes[node])

    for edge in dict_edges.keys():
        for co_edge in dict_edges[edge].keys():
            if dict_edges[edge][co_edge] > 0:
                nw_author.add_edge(edge,
                                   co_edge,
                                   weight=dict_edges[edge][co_edge])

    # get positions
    pos_ = nx.spring_layout(nw_author)

    # make edge trace
    edge_trace = []
    text_trace = []
    for edge in nw_author.edges():
        if nw_author.edges()[edge]['weight'] > 0:
            char_1 = edge[0]
            char_2 = edge[1]
        x0, y0 = pos_[char_1]
        x1, y1 = pos_[char_2]
        text = char_2 + ': ' + str(nw_author.edges()[edge]['weight'])
        edge_trace_tmp = make_edge(
            [x0, x1, None], [y0, y1, None],
            text,
            width=nw_author.edges()[edge]['weight']**0.5)
        edge_trace.append(edge_trace_tmp)
    for edge in nw_author.edges():
        if nw_author.edges()[edge]['weight'] > 0:
            char_1 = edge[1]
            char_2 = edge[0]
        x0, y0 = pos_[char_1]
        x1, y1 = pos_[char_2]
        text = char_2 + ': ' + str(nw_author.edges()[edge]['weight'])
        edge_trace_tmp = make_edge(
            [x0, x1, None], [y0, y1, None],
            text,
            width=nw_author.edges()[edge]['weight']**0.5)
        edge_trace.append(edge_trace_tmp)

    # make node trace
    node_trace = go.Scatter(x=[],
                            y=[],
                            text=[],
                            textposition="top center",
                            textfont_size=10,
                            mode='markers+text',
                            hoverinfo='none',
                            marker=dict(color=[], size=[], line=None))
    for node in nw_author.nodes():
        x, y = pos_[node]
        node_trace['x'] += tuple([x])
        node_trace['y'] += tuple([y])
        node_trace['marker']['color'] += tuple([color_nw])
        # node_trace['marker']['size'] += tuple([5*nw_author.nodes()[node]['size']])
        node_trace['text'] += tuple(['<b>' + node + '</b>'])

    # customize layout
    layout = go.Layout(paper_bgcolor='rgba(0,0,0,0)',
                       plot_bgcolor='rgba(0,0,0,0)',
                       hovermode='x')
    fig = go.Figure(layout=layout)
    for trace in edge_trace:
        fig.add_trace(trace)
    fig.add_trace(node_trace)
    fig.update_layout(showlegend=False)
    fig.update_xaxes(showticklabels=False)
    fig.update_yaxes(showticklabels=False)

    path_html = func.get_abspath_folder_lastquarter() + "network_author.html"
    fig.write_html(path_html)

    print("Successfully generated cooperation network under path: {}".format(
        path_html))