Example #1
0
def browse_aps(path_, database_file):

    #    header = ['#','doi', 'journal', 'year', 'country']

    list_ = []
    fo = open(database_file, 'w')
    #    fo.write( " ".join(header) + "\n" )
    for root, dirs, files in os.walk(path_):
        for name in files:
            if name.endswith((".json")):
                jfile = root + "/" + name

                affiliations_ = get_all_affiliations(jfile)
                year_ = get_year_jsonfile(jfile)
                journal_ = get_journal_short_json(jfile)
                doi_ = get_doi(jfile)

                countries_list = []
                for aff_ in affiliations_:
                    if len(aff_) > 0:
                        try:
                            country_ = extract_country(aff_)
                            if len(country_) > 0:
                                countries_list.append(country_)

                        except Exception, e:
                            continue

                countries_list = list(set(countries_list))

                if len(countries_list) > 0:
                    for c_ in countries_list:
                        c_parts = c_.split()
                        record = [doi_, journal_, year_, ".".join(c_parts)]
                        fo.write(" ".join(record) + "\n")
Example #2
0
def browse_aps(path_, database_file):

#    header = ['#','doi', 'journal', 'year', 'country']

    list_ = []
    fo = open(database_file, 'w')
#    fo.write( " ".join(header) + "\n" )
    for root, dirs, files in os.walk(path_):
        for name in files:
            if name.endswith(( ".json" )):
                jfile = root + "/" + name

                coauthors_number_ = len( get_coauthors_jsonfile(jfile) )
                year_ = get_year_jsonfile(jfile) 
                journal_ = get_journal_short_json(jfile)
                doi_ = get_doi(jfile)


                if coauthors_number_ > 0:
                    record = [doi_, journal_, year_, str( coauthors_number_) ]
                    fo.write( " ".join(record) + "\n" )
                        
    fo.close()

    return True
Example #3
0
def browse_papers(path_, csv_file, fout):
    fo = open(fout, 'w')
    print("Processing citations ...")
    dict_1, dict_2 = parse_csv_file(csv_file)

    print("Processing files ...")

    tmp_list = []
    for root, dirs, files in os.walk(path_):
        for name in files:
            if name.endswith((".json")):
                jfile = root + "/" + name
                data = json.load(open(jfile))

                year, month, day = get_date_jsonfile(jfile, data)
                journal = get_journal_short_json(jfile, data)
                issue, volume = get_issue_volume(jfile, data)
                doi = get_doi(jfile, data)
                num_pages = get_number_of_pages(jfile, data)
                coauthors = get_coauthors_jsonfile(jfile, data)
                affiliations = get_all_affiliations(jfile, data)
                countries = get_all_countries(jfile, data)
                title = get_title(jfile, data)

                str_out = ""
                str_out += str(year) + " "
                str_out += str(month) + " "
                str_out += str(day) + " "
                str_out += str(journal) + " "
                str_out += str(issue) + " "
                str_out += str(volume) + " "
                str_out += str(doi) + " "
                str_out += str(len(coauthors)) + " "
                str_out += str(len(affiliations)) + " "
                str_out += str(len(countries)) + " "
                str_out += str(len(title)) + " "
                str_out += str(num_pages) + " "

                if doi in dict_1.keys():
                    str_out += str(len(dict_1[doi])) + " "
                else:
                    str_out += str(0) + " "

                if doi in dict_2.keys():
                    str_out += str(len(dict_2[doi])) + " "
                else:
                    str_out += str(0) + " "

                fo.write(str_out + "\n")

    fo.close()
Example #4
0
def browse_papers(path_, dict_):

    for root, dirs, files in os.walk(path_):
        for name in files:
            if name.endswith((".json")):
                jfile = root + "/" + name

                year = get_year_jsonfile(jfile)
                j_id = get_journal_short_json(jfile)
                if j_id in dict_:
                    if year in dict_[j_id]:
                        dict_[j_id][year] += 1
                    else:
                        dict_[j_id][year] = 1
                else:
                    dict_[j_id] = {}
                    dict_[j_id][year] = 1

    return dict_
Example #5
0
def browse_papers(path_, csv_file, xmin=60):
    print("Processing citations ...")
    dict_1, dict_2 = parse_csv_file(csv_file)

    print("Processing files ...")

    papers_list = {}
    for root, dirs, files in os.walk(path_):
        for name in files:
            if name.endswith(( ".json" )):
                jfile = root + "/" + name
                data = json.load( open(jfile) )

                year,month,day = get_date_jsonfile(jfile,data)
                journal = get_journal_short_json(jfile,data)
                issue,volume = get_issue_volume(jfile,data)
                coauthors = get_coauthors_jsonfile(jfile,data)
                title = get_title(jfile,data)
                doi_ = get_doi(jfile,data)
                
                if doi_ in dict_1.keys():
                    cits_ = len( dict_1[doi_] )
                else:
                    cits_ = 0
                
                if doi_ in dict_2.keys():
                    refs_ = len( dict_2[doi_] )
                else:
                    refs_ = 0

                if cits_ >= xmin:
                    papers_list[doi_] = [ title.encode('utf-8'),str(journal),str(year),str(volume),str(issue),str(cits_),str(refs_) ]


    print("Database processed ...")
    return papers_list
Example #6
0
def browse_papers(path_, csv_file):
    print("Processing citations ...")
    dict_1, dict_2 = parse_csv_file(csv_file)

    #    client = MongoClient('localhost', 27017)
    client = MongoClient()
    db = client['apsdb']  # Get a databese
    aps = db['aps-articles-basic']  # Get a collection

    print("Removing all record ...")
    aps.delete_many({})  # Clean the collection

    print("Processing files ...")

    tmp_list = []
    for root, dirs, files in os.walk(path_):
        for name in files:
            if name.endswith((".json")):
                jfile = root + "/" + name
                data = json.load(open(jfile))

                year, month, day = get_date_jsonfile(jfile, data)
                journal = get_journal_short_json(jfile, data)
                issue, volume = get_issue_volume(jfile, data)
                doi = get_doi(jfile, data)
                num_pages = get_number_of_pages(jfile, data)
                coauthors = get_coauthors_jsonfile(jfile, data)
                affiliations = get_all_affiliations(jfile, data)
                countries = get_all_countries(jfile, data)
                title = get_title(jfile, data)

                aps_paper = {'year': year, 'month': month, 'day': day}
                aps_paper['journal'] = journal
                aps_paper['issue'] = issue
                aps_paper['volume'] = volume
                aps_paper['doi'] = doi
                aps_paper['num_authors'] = len(coauthors)
                aps_paper['num_affs'] = len(affiliations)
                aps_paper['num_countries'] = len(countries)
                aps_paper['title'] = title
                aps_paper['title_length'] = len(title)

                aps_paper['num_pages'] = num_pages

                if doi in dict_1.keys():
                    aps_paper['citations'] = len(dict_1[doi])
                else:
                    aps_paper['citations'] = 0

                if doi in dict_2.keys():
                    aps_paper['num_references'] = len(dict_2[doi])
                else:
                    aps_paper['num_references'] = 0

                tmp_list.append(aps_paper)
                if len(tmp_list) > BIG_LIST_SIZE:
                    aps.insert_many(tmp_list)
                    tmp_list = []

    if len(tmp_list) > 0:
        aps.insert_many(tmp_list)
        tmp_list = []

    return aps
Example #7
0
def browse_papers(path_, csv_file, ofiles):
    BUFF_SIZE=0
    of1,of2,of3,of4,of5 = ofiles
   
    f1 = open(of1, 'w', BUFF_SIZE)
    f2 = open(of2, 'w', BUFF_SIZE)
    f3 = open(of3, 'w', BUFF_SIZE)
    f4 = open(of4, 'w', BUFF_SIZE)
    f5 = open(of5, 'w', BUFF_SIZE)

    print("Processing citations ...")
    dict_1, dict_2 = parse_csv_file(csv_file)

    print("Processing files ...")

    tmp_list = []
    for root, dirs, files in os.walk(path_):
        for name in files:
            if name.endswith(( ".json" )):
                jfile = root + "/" + name
                data = json.load( open(jfile) )

                try:
                    year,_,_ = get_date_jsonfile(jfile,data)
                    journal = get_journal_short_json(jfile,data)
                    doi = get_doi(jfile,data)
                    title = get_clean_title(jfile,data)

                    if doi in dict_1.keys():
                        cits = len(dict_1[doi])
                    else:
                        cits = 0

                    abstract = get_clean_abstract(jfile,data)
                    abstract = abstract.replace('\n', ' ').replace('\r', '')

                    _=str(journal)
                    _=str(year)
                    _=str(cits)
                    _=abstract.encode('utf-8')
                    _=title.encode('utf-8')

                    f1.write(str(journal) + "\n")
                    f2.write(str(year) + "\n")
                    f3.write(str(cits) + "\n")
                    f4.write( abstract.encode('utf-8') + "\n")
                    f5.write( title.encode('utf-8') + "\n")

                except KeyError as e:
                    print 'KeyError', e
                    pass
                except IOError as e:
                    print 'KeyError', e
                    pass
                except UnicodeEncodeError as e:
                    print 'KeyError', e
                    pass
         

    f1.close()
    f2.close()
    f3.close()
    f4.close()
    f5.close()
    return