Esempio n. 1
0
def set_discontinue_journal(url):
    '''
    存储不需要爬取期刊链接
    :param url:
    :return:
    '''
    name_manager().save_discontiune_journal(url)
Esempio n. 2
0
def create_and_save_execel(section,*name):
    '''
    创建excel并将数据写入execl
    :param section:
    :return:
    '''

    if name.__len__() != 0:
        execel_name = create_execl_name(name[0])
    else:
        execel_name=create_execl_name(section)
    wb=openpyxl.Workbook()
    sheet=wb.create_sheet("sheet1",0)
    line=2
    write_first_line(sheet)
    while (True):
        article_data = name_manager().get_article_data(section)
        if article_data == None:
            break
        article = json.loads(article_data)
        sheet.cell(line,Row_Name.COLUME_NUM.get(Row_Name.EXCELNAME)+1,execel_name[:-5])
        sheet.cell(line,Row_Name.COLUME_NUM.get(Row_Name.SERIAL_NUMBER)+1,line-1)
        for key in article.keys():
            num=Row_Name.COLUME_NUM.get(key,None)
            if num != None:
                try:
                    sheet.cell(line,num+1,article[key])
                except:
                    print("错误:",line,num+1,article[key])

        line+=1

    wb.save(EXECEL_PATH+execel_name)
Esempio n. 3
0
def write_log(num,file_path):
    file=open(file_path,"a+",encoding="utf-8")
    nm=name_manager()
    while(True):

        if num ==1:
            temp_data=nm.get_journal_error_massage()
        elif num ==2:
            temp_data=nm.get_article_error_massage()
        if temp_data == None:
            break
        file.write(temp_data+"\n")
Esempio n. 4
0
def run_article_error(file):
    '''
     重新执行出错的文章级别所有链接(运行)
    :param file:
    :return:
    '''
    file = open(file)
    errs = {}
    pubs = {}
    nm = name_manager()
    for line in file.readlines():
        # line_l=json.loads(line)
        # print(line_l)
        temp = json.loads(json.loads(line)[1])
        new_dict = {}
        print(temp)
        if Row_Name.EISSN in temp:
            new_dict[Row_Name.EISSN] = temp[Row_Name.EISSN]
        if Row_Name.ISSN in temp:
            new_dict[Row_Name.ISSN] = temp[Row_Name.ISSN]
        new_dict[Row_Name.JOURNAL_TITLE] = temp[Row_Name.JOURNAL_TITLE]
        new_dict[Row_Name.PUBLISHER] = temp[Row_Name.PUBLISHER]
        new_dict[Row_Name.STRING_COVER_DATE] = temp[Row_Name.STRING_COVER_DATE]
        new_dict[Row_Name.YEAR] = temp[Row_Name.YEAR]
        new_dict[Row_Name.VOLUME] = temp[Row_Name.VOLUME]
        new_dict[Row_Name.ISSUE] = temp[Row_Name.ISSUE]
        new_dict[Row_Name.TEMP_URL] = temp[Row_Name.TEMP_URL]

        string = new_dict[Row_Name.JOURNAL_TITLE] + "_" + new_dict[
            Row_Name.VOLUME] + "_" + new_dict[Row_Name.ISSUE]

        if not string in errs:
            errs[string] = new_dict

    for key in errs.keys():
        pub = errs[key][Row_Name.PUBLISHER]
        if not pub in pubs:
            pubs[pub] = 1
        pyfile = __import__("journals.website." + pub, fromlist=True)
        ac = getattr(pyfile, "article")
        m_get = getattr(ac(), "do_run")
        ais = m_get(errs[key])

        m_save = getattr(ac(), "save_data")
        m_save(ais, errs[key])

    write_data(pubs)
Esempio n. 5
0
 def __init__(self):
     self.nm = name_manager()
     self.method = []
Esempio n. 6
0
 def __init__(self):
     self.nm = name_manager()
     self.list = []
Esempio n. 7
0
 def __init__(self, website):
     self.nm = name_manager()
     self.website = website
Esempio n. 8
0
 def __init__(self, section, items, update):
     threading.Thread.__init__(self)
     self.section = section
     self.items = items
     self.nm = name_manager()
     self.update = update