def export_words_to_csv(de_words, en_words, exportFile = 'result.csv'): table = [] for k in range(len(de_words)): table.append([de_words[k], en_words[k]]) csv_tools.write_table_csv(exportFile, table)
'Link' ] list_citations = [list_header] refTable = csv_tools.read_csv_table(listReferences) # Note: header of refTable is ['ID', 'Number of references', 'Number of references outside selected database', 'Number of self-cited references', 'List ID of references', 'Title', 'Authors', 'Source title', 'Publisher', 'Year', 'Link'] number_of_citations = [0] * len(refTable) id_of_citing_papers = [ [] for _ in range(len(refTable)) ] # initialize array of empty lists, each row contains list of papers that cite this one # id_of_citing_papers = [[]]*len(refTable) # theoretically this command also creates an array of empty lists, however due to Python management the append/extend for each item affects all others for row in refTable[1:]: paper_id = str(row[0]) #print('Processing paper: ', paper_id) paper_ref = row[4].split(',') if not paper_ref == [ '' ]: # there are some references in the list of papers for ref in paper_ref: number_of_citations[int(ref)] += 1 id_of_citing_papers[int(ref)].append(paper_id) for k in range(1, len(refTable)): list_citations.append( [k, number_of_citations[k], ','.join(id_of_citing_papers[k])] + refTable[k][5:]) csv_tools.write_table_csv(exportFile, list_citations) #print(list_citations) print('Analysis finished. Result is written to file ' + exportFile)
sourceTitle_index, abbr_sourceTitle_index, publisher_index, conference_index ] for row in table[1:]: if not row[sourceTitle_index] in processed_source: authors_split = row[author_index].split(',') list_authors = [ author.strip() for author in authors_split ] flag_author_exist = False for author in list_authors: if author in filtered_authors: flag_author_exist = True if flag_author_exist: list_sources.append( [row[k] for k in access_index]) processed_source.append(row[sourceTitle_index]) # This command should be executed after all the papers were collected del list_sources[0] # remove header line before sorting list_sources.sort( key=lambda x: x[0], reverse=False) # sort by title, note that ID column is not added yet list_sources.insert(0, list_header) for k in range(1, len(list_sources)): list_sources[k].insert(0, k) # add ID list_sources[k].append('') # add blank column for Rating csv_tools.write_table_csv(exportFile, list_sources) #print(list_sources) print('Analysis finished. Result is written to file ' + exportFile)
paper_title = ref_titles[kk] paper_authors = ref_authors[kk] if not paper_title in paperTable_transpose[ 1]: no_ref_ext += 1 else: # reference is in the paper table ref_id_in_paperTable = paperTable_transpose[ 1].index(paper_title) list_ref_IDs.append( str(ref_id_in_paperTable)) self_cite_flag = False for single_author in paper_authors: if single_author in paperTable_transpose[ 2][ref_id_in_paperTable]: self_cite_flag = True if self_cite_flag == True: no_ref_self += 1 list_ref_IDs_string = ','.join(list_ref_IDs) ref_detail_row = [ id_in_paperTable, no_ref, no_ref_ext, no_ref_self, list_ref_IDs_string ] + paperTable[id_in_paperTable][1:] list_references.append(ref_detail_row) # This command should be executed after all the references were collected del list_references[0] # remove header line before sorting list_references.sort(key=lambda x: x[0], reverse=False) # sort by ID list_references.insert(0, list_header) csv_tools.write_table_csv(exportFile, list_references) #print(list_references)
no_refs_to_net[filtered_authors.index(person)] += 1 if not paper_id in temp_list_citations_from_net[filtered_authors.index(ref_author)]: temp_list_citations_from_net[filtered_authors.index(ref_author)].append(paper_id) no_citations_from_net[filtered_authors.index(ref_author)] += 1 if not (paper_id, ref_id) in temp_list_ref_pairs_within_net[filtered_authors.index(person)]: temp_list_ref_pairs_within_net[filtered_authors.index(person)].append((paper_id, ref_id)) no_times_references_to_net[filtered_authors.index(person)] += 1 if not (paper_id, ref_id) in temp_list_cite_pairs_within_net[filtered_authors.index(ref_author)]: temp_list_cite_pairs_within_net[filtered_authors.index(ref_author)].append((paper_id, ref_id)) no_times_citations_from_net[filtered_authors.index(ref_author)] += 1 matrix_references[filtered_authors.index(person)][filtered_authors.index(ref_author)] += 1 #matrix_citations[filtered_authors.index(ref_author)][filtered_authors.index(person)] += 1 # just the transpose of matrix_references for k in range(len(filtered_authors)): list_stats.append([k+1, filtered_authors[k], no_papers[k], no_papers_citing_net[k], no_papers_cited_by_net[k], no_refs_to_net[k], no_citations_from_net[k], no_total_refs[k], no_times_references_to_net[k], no_times_citations_from_net[k]] + matrix_cite_with_rating[k] + matrix_references[k]) # Extend the header for rating in rating_list: list_stats[0].append('Citation rating: ' + rating) for author in filtered_authors: list_stats[0].append(author + ' (cited by author in row)') csv_tools.write_table_csv(exportFile, list_stats) #print(list_stats) print('Analysis finished. Result is written to file ' + exportFile)
conference_index = headerTable.index('Conference name') link_index = headerTable.index('DOI') year_index = headerTable.index('Year') access_index = [ title_index, author_index, sourceTitle_index, publisher_index, abbr_sourceTitle_index, conference_index, year_index, link_index ] for row in table[1:]: if not row[title_index] in [ item[0] for item in list_papers ]: list_papers.append([row[k] for k in access_index]) # This command should be executed after all the papers were collected del list_papers[0] # remove header line before sorting list_papers.sort( key=lambda x: x[0], reverse=False) # sort by title, note that ID column is not added yet list_papers.insert(0, list_header) for k in range(1, len(list_papers)): list_papers[k].insert(0, k) # add ID if list_papers[k][3] in sourceTable_transpose[1]: # paper inherits rating from source list_papers[k].append( sourceTable_transpose[5][sourceTable_transpose[1].index( list_papers[k][3])]) csv_tools.write_table_csv(exportFile, list_papers) #print(list_papers) print('Analysis finished. Result is written to file ' + exportFile)
for paper_affi in affi_data[1:]: list_authors = paper_affi.split(';') for author in list_authors: if not author.strip( ) in list_author_affi: # compare both name and affiliation list_author_affi.append(author.strip()) # This command should be executed after all the author affiliations were collected list_author_affi.sort() list_author_combined, list_affi_combined = group_affi_by_author( list_author_affi) authorTable = export_table_combined(list_author_combined, list_affi_combined) list_header = ['Author', 'Number of affiliations', 'Affiliations'] authorTable.insert(0, list_header) csv_tools.write_table_csv(exportFile, authorTable) ## Test with one file #if __name__ == "__main__": #csvFile = 'TimonRabczuk_citation_by_authors.csv' #exportFile = 'list_all_authors.csv' #list_author_affi = [] ## For each CSV file #table = csv_tools.read_csv_table(csvFile) #headerTable = table[0] #if 'Authors with affiliations' in headerTable: #affi_index = headerTable.index('Authors with affiliations') #affi_data = [row[affi_index] for row in table] ##print('List of authors and affiliations: \n') ##print(affi_data[1922]) ##test = parse_author_affi(affi_data[1922])
else: exportFile = 'all_database.csv' #merge_scopus_data(dataFolder, exportFile) merged_table = [] processed_papers = [] for dirpath, dirs, files in os.walk(dataFolder): for filename in files: if filename[-4:].upper() == '.CSV': csvFile = os.path.join(dirpath, filename) # For each CSV file print('Processing file ' + csvFile) table = csv_tools.read_csv_table(csvFile) headerTable = table[0] if 'Title' in headerTable: title_index = headerTable.index('Title') for row in table[1:]: if not row[title_index] in processed_papers: processed_papers.append(row[title_index]) merged_table.append(row) # This command should be executed after all the papers were collected merged_table.sort( key=lambda x: x[title_index], reverse=False ) # sort by title, assume all data files have the same header merged_table.insert(0, headerTable) csv_tools.write_table_csv(exportFile, merged_table) #print(list_papers) print('Merging finished. Result is written to file ' + exportFile)