Exemple #1
0
def export_words_to_csv(de_words, en_words, exportFile = 'result.csv'):
    table = []
    for k in range(len(de_words)):
        table.append([de_words[k], en_words[k]])
    csv_tools.write_table_csv(exportFile, table)
Exemple #2
0
        'Link'
    ]
    list_citations = [list_header]

    refTable = csv_tools.read_csv_table(listReferences)
    # Note: header of refTable is ['ID', 'Number of references', 'Number of references outside selected database', 'Number of self-cited references', 'List ID of references', 'Title', 'Authors', 'Source title', 'Publisher', 'Year', 'Link']
    number_of_citations = [0] * len(refTable)
    id_of_citing_papers = [
        [] for _ in range(len(refTable))
    ]  # initialize array of empty lists, each row contains list of papers that cite this one
    # id_of_citing_papers = [[]]*len(refTable) # theoretically this command also creates an array of empty lists, however due to Python management the append/extend for each item affects all others

    for row in refTable[1:]:
        paper_id = str(row[0])
        #print('Processing paper: ', paper_id)
        paper_ref = row[4].split(',')
        if not paper_ref == [
                ''
        ]:  # there are some references in the list of papers
            for ref in paper_ref:
                number_of_citations[int(ref)] += 1
                id_of_citing_papers[int(ref)].append(paper_id)
    for k in range(1, len(refTable)):
        list_citations.append(
            [k, number_of_citations[k], ','.join(id_of_citing_papers[k])] +
            refTable[k][5:])

    csv_tools.write_table_csv(exportFile, list_citations)
    #print(list_citations)
    print('Analysis finished. Result is written to file ' + exportFile)
Exemple #3
0
                        sourceTitle_index, abbr_sourceTitle_index,
                        publisher_index, conference_index
                    ]
                    for row in table[1:]:
                        if not row[sourceTitle_index] in processed_source:
                            authors_split = row[author_index].split(',')
                            list_authors = [
                                author.strip() for author in authors_split
                            ]
                            flag_author_exist = False
                            for author in list_authors:
                                if author in filtered_authors:
                                    flag_author_exist = True
                            if flag_author_exist:
                                list_sources.append(
                                    [row[k] for k in access_index])
                                processed_source.append(row[sourceTitle_index])

    # This command should be executed after all the papers were collected
    del list_sources[0]  # remove header line before sorting
    list_sources.sort(
        key=lambda x: x[0],
        reverse=False)  # sort by title, note that ID column is not added yet
    list_sources.insert(0, list_header)
    for k in range(1, len(list_sources)):
        list_sources[k].insert(0, k)  # add ID
        list_sources[k].append('')  # add blank column for Rating
    csv_tools.write_table_csv(exportFile, list_sources)
    #print(list_sources)
    print('Analysis finished. Result is written to file ' + exportFile)
Exemple #4
0
                                    paper_title = ref_titles[kk]
                                    paper_authors = ref_authors[kk]
                                    if not paper_title in paperTable_transpose[
                                            1]:
                                        no_ref_ext += 1
                                    else:  # reference is in the paper table
                                        ref_id_in_paperTable = paperTable_transpose[
                                            1].index(paper_title)
                                        list_ref_IDs.append(
                                            str(ref_id_in_paperTable))
                                        self_cite_flag = False
                                        for single_author in paper_authors:
                                            if single_author in paperTable_transpose[
                                                    2][ref_id_in_paperTable]:
                                                self_cite_flag = True
                                        if self_cite_flag == True:
                                            no_ref_self += 1
                                list_ref_IDs_string = ','.join(list_ref_IDs)
                                ref_detail_row = [
                                    id_in_paperTable, no_ref, no_ref_ext,
                                    no_ref_self, list_ref_IDs_string
                                ] + paperTable[id_in_paperTable][1:]
                                list_references.append(ref_detail_row)

    # This command should be executed after all the references were collected
    del list_references[0]  # remove header line before sorting
    list_references.sort(key=lambda x: x[0], reverse=False)  # sort by ID
    list_references.insert(0, list_header)
    csv_tools.write_table_csv(exportFile, list_references)
    #print(list_references)
                                no_refs_to_net[filtered_authors.index(person)] += 1

                            if not paper_id in temp_list_citations_from_net[filtered_authors.index(ref_author)]:
                                temp_list_citations_from_net[filtered_authors.index(ref_author)].append(paper_id)
                                no_citations_from_net[filtered_authors.index(ref_author)] += 1
                            
                            if not (paper_id, ref_id) in temp_list_ref_pairs_within_net[filtered_authors.index(person)]:
                                temp_list_ref_pairs_within_net[filtered_authors.index(person)].append((paper_id, ref_id))
                                no_times_references_to_net[filtered_authors.index(person)] += 1
                            
                            if not (paper_id, ref_id) in temp_list_cite_pairs_within_net[filtered_authors.index(ref_author)]:
                                temp_list_cite_pairs_within_net[filtered_authors.index(ref_author)].append((paper_id, ref_id))
                                no_times_citations_from_net[filtered_authors.index(ref_author)] += 1
                            
                            matrix_references[filtered_authors.index(person)][filtered_authors.index(ref_author)] += 1
                            #matrix_citations[filtered_authors.index(ref_author)][filtered_authors.index(person)] += 1 # just the transpose of matrix_references
                            
    
    for k in range(len(filtered_authors)):
        list_stats.append([k+1, filtered_authors[k], no_papers[k], no_papers_citing_net[k], no_papers_cited_by_net[k], no_refs_to_net[k], no_citations_from_net[k], no_total_refs[k], no_times_references_to_net[k], no_times_citations_from_net[k]] + matrix_cite_with_rating[k] + matrix_references[k])
    
    # Extend the header
    for rating in rating_list:
        list_stats[0].append('Citation rating: ' + rating)
    for author in filtered_authors:
        list_stats[0].append(author + ' (cited by author in row)')
    
    csv_tools.write_table_csv(exportFile, list_stats)
    #print(list_stats)
    print('Analysis finished. Result is written to file ' + exportFile)
    
                    conference_index = headerTable.index('Conference name')
                    link_index = headerTable.index('DOI')
                    year_index = headerTable.index('Year')
                    access_index = [
                        title_index, author_index, sourceTitle_index,
                        publisher_index, abbr_sourceTitle_index,
                        conference_index, year_index, link_index
                    ]
                    for row in table[1:]:
                        if not row[title_index] in [
                                item[0] for item in list_papers
                        ]:
                            list_papers.append([row[k] for k in access_index])

    # This command should be executed after all the papers were collected
    del list_papers[0]  # remove header line before sorting
    list_papers.sort(
        key=lambda x: x[0],
        reverse=False)  # sort by title, note that ID column is not added yet
    list_papers.insert(0, list_header)
    for k in range(1, len(list_papers)):
        list_papers[k].insert(0, k)  # add ID
        if list_papers[k][3] in sourceTable_transpose[1]:
            # paper inherits rating from source
            list_papers[k].append(
                sourceTable_transpose[5][sourceTable_transpose[1].index(
                    list_papers[k][3])])
    csv_tools.write_table_csv(exportFile, list_papers)
    #print(list_papers)
    print('Analysis finished. Result is written to file ' + exportFile)
                    for paper_affi in affi_data[1:]:
                        list_authors = paper_affi.split(';')
                        for author in list_authors:
                            if not author.strip(
                            ) in list_author_affi:  # compare both name and affiliation
                                list_author_affi.append(author.strip())

    # This command should be executed after all the author affiliations were collected
    list_author_affi.sort()
    list_author_combined, list_affi_combined = group_affi_by_author(
        list_author_affi)
    authorTable = export_table_combined(list_author_combined,
                                        list_affi_combined)
    list_header = ['Author', 'Number of affiliations', 'Affiliations']
    authorTable.insert(0, list_header)
    csv_tools.write_table_csv(exportFile, authorTable)

## Test with one file
#if __name__ == "__main__":
#csvFile = 'TimonRabczuk_citation_by_authors.csv'
#exportFile = 'list_all_authors.csv'
#list_author_affi = []
## For each CSV file
#table = csv_tools.read_csv_table(csvFile)
#headerTable = table[0]
#if 'Authors with affiliations' in headerTable:
#affi_index = headerTable.index('Authors with affiliations')
#affi_data = [row[affi_index] for row in table]
##print('List of authors and affiliations: \n')
##print(affi_data[1922])
##test = parse_author_affi(affi_data[1922])
    else:
        exportFile = 'all_database.csv'

    #merge_scopus_data(dataFolder, exportFile)
    merged_table = []
    processed_papers = []

    for dirpath, dirs, files in os.walk(dataFolder):
        for filename in files:
            if filename[-4:].upper() == '.CSV':
                csvFile = os.path.join(dirpath, filename)
                # For each CSV file
                print('Processing file ' + csvFile)
                table = csv_tools.read_csv_table(csvFile)
                headerTable = table[0]
                if 'Title' in headerTable:
                    title_index = headerTable.index('Title')
                    for row in table[1:]:
                        if not row[title_index] in processed_papers:
                            processed_papers.append(row[title_index])
                            merged_table.append(row)

    # This command should be executed after all the papers were collected
    merged_table.sort(
        key=lambda x: x[title_index], reverse=False
    )  # sort by title, assume all data files have the same header
    merged_table.insert(0, headerTable)
    csv_tools.write_table_csv(exportFile, merged_table)
    #print(list_papers)
    print('Merging finished. Result is written to file ' + exportFile)