] list_sources = [list_header] with open(listNetAuthors, "rt") as textfile: author_lines = textfile.readlines() filtered_authors = [x.strip() for x in author_lines if x.strip()] processed_source = [] for dirpath, dirs, files in os.walk(dataFolder): for filename in files: if filename[-4:].upper() == '.CSV': csvFile = os.path.join(dirpath, filename) # For each CSV file print('Processing file ' + csvFile) table = csv_tools.read_csv_table(csvFile) headerTable = table[0] if 'Authors' in headerTable and 'Source title' in headerTable and 'Abbreviated Source Title' in headerTable and 'Publisher' in headerTable and 'Conference name' in headerTable: author_index = headerTable.index('Authors') sourceTitle_index = headerTable.index('Source title') abbr_sourceTitle_index = headerTable.index( 'Abbreviated Source Title') publisher_index = headerTable.index('Publisher') conference_index = headerTable.index('Conference name') access_index = [ sourceTitle_index, abbr_sourceTitle_index, publisher_index, conference_index ] for row in table[1:]: if not row[sourceTitle_index] in processed_source: authors_split = row[author_index].split(',')
exportFile = 'list_citations.csv' if len(sys.argv) > 3: listReferences = str(sys.argv[3]) else: listReferences = 'list_references.csv' #get_citations(dataFolder, exportFile, listReferences) list_header = [ 'ID', 'Number of citations in selected database', 'List ID of citations', 'Title', 'Authors', 'Source title', 'Abbreviated Source Title', 'Publisher', 'Conference name', 'Year', 'Link' ] list_citations = [list_header] refTable = csv_tools.read_csv_table(listReferences) # Note: header of refTable is ['ID', 'Number of references', 'Number of references outside selected database', 'Number of self-cited references', 'List ID of references', 'Title', 'Authors', 'Source title', 'Publisher', 'Year', 'Link'] number_of_citations = [0] * len(refTable) id_of_citing_papers = [ [] for _ in range(len(refTable)) ] # initialize array of empty lists, each row contains list of papers that cite this one # id_of_citing_papers = [[]]*len(refTable) # theoretically this command also creates an array of empty lists, however due to Python management the append/extend for each item affects all others for row in refTable[1:]: paper_id = str(row[0]) #print('Processing paper: ', paper_id) paper_ref = row[4].split(',') if not paper_ref == [ '' ]: # there are some references in the list of papers for ref in paper_ref:
exportFile = str(sys.argv[2]) else: exportFile = 'list_papers.csv' if len(sys.argv) > 3: listSource = str(sys.argv[3]) else: listSource = 'list_sources.csv' #get_papers(dataFolder, exportFile) list_header = [ 'ID', 'Title', 'Authors', 'Source title', 'Abbreviated Source Title', 'Publisher', 'Conference name', 'Year', 'Link', 'Rating' ] list_papers = [list_header] sourceTable = csv_tools.read_csv_table(listSource) # Note: header of sourceTable is ['ID', 'Source title', 'Abbreviated Source Title', 'Publisher', 'Conference name', 'Rating'] sourceTable_transpose = csv_tools.transpose_table(sourceTable) for dirpath, dirs, files in os.walk(dataFolder): for filename in files: if filename[-4:].upper() == '.CSV': csvFile = os.path.join(dirpath, filename) # For each CSV file print('Processing file ' + csvFile) table = csv_tools.read_csv_table(csvFile) headerTable = table[0] if 'Authors' in headerTable and 'Title' in headerTable and 'Year' in headerTable and 'Source title' in headerTable and 'DOI' in headerTable and 'Publisher' in headerTable: title_index = headerTable.index('Title') author_index = headerTable.index('Authors') sourceTitle_index = headerTable.index('Source title')
exportFile = 'list_references.csv' if len(sys.argv) > 3: listPaper = str(sys.argv[3]) else: listPaper = 'list_papers.csv' #get_papers(dataFolder, exportFile) list_header = [ 'ID', 'Number of references', 'Number of references outside selected database', 'Number of self-cited references', 'List ID of references', 'Title', 'Authors', 'Source title', 'Publisher', 'Year', 'Link' ] list_references = [list_header] paperTable = csv_tools.read_csv_table(listPaper) # Note: header of paperTable is ['ID', 'Title', 'Authors', 'Source title', 'Publisher', 'Year', 'Link'] paperTable_transpose = csv_tools.transpose_table(paperTable) for k in range(1, len(paperTable_transpose[2])): authors_split = paperTable_transpose[2][k].split(',') paperTable_transpose[2][k] = [item.strip() for item in authors_split] for dirpath, dirs, files in os.walk(dataFolder): for filename in files: if filename[-4:].upper() == '.CSV': csvFile = os.path.join(dirpath, filename) # For each CSV file table = csv_tools.read_csv_table(csvFile) headerTable = table[0] if 'Authors' in headerTable and 'Title' in headerTable and 'References' in headerTable: title_index = headerTable.index('Title')
else: exportFile = 'list_sources_citations_net.csv' if len(sys.argv)>3: listNetAuthors = str(sys.argv[3]) else: listNetAuthors = 'input_net_authors.txt' #get_sources_citations_net(listCitations, exportFile, listNetAuthors) list_header = ['ID', 'Source title', 'Abbreviated Source Title', 'Publisher', 'Conference name', 'Rating'] list_sources = [list_header] with open(listNetAuthors, "rt") as textfile: author_lines = textfile.readlines() filtered_authors = [x.strip() for x in author_lines if x.strip()] citeTable = csv_tools.read_csv_table(listCitations) headerTable = citeTable[0] # Note that header of citation table is ['ID', 'Number of citations in selected database', 'List ID of citations', 'Title', 'Authors', 'Source title', 'Abbreviated Source Title', 'Publisher', 'Conference name', 'Year', 'Link'] processed_source = [] author_index = headerTable.index('Authors') sourceTitle_index = headerTable.index('Source title') abbr_sourceTitle_index = headerTable.index('Abbreviated Source Title') publisher_index = headerTable.index('Publisher') conference_index = headerTable.index('Conference name') access_index = [sourceTitle_index, abbr_sourceTitle_index, publisher_index, conference_index] for row in citeTable[1:]: # Collect journal name of authors