def updateData(excel_file): index_filename, nrows, ncols = getIndexByTitle(excel_file, title='filename') index_filesize, nrows, ncols = getIndexByTitle(excel_file, title='filesize') index_issue_date, nrows, ncols = getIndexByTitle(excel_file, title='issuedate') if (index_filename == 0 or index_filesize == 0 or index_issue_date == 0): print(excel_file) else: book = xlrd.open_workbook(excel_file) wtbook = xlutils.copy.copy(book) wtsheet = wtbook.get_sheet(0) table_list = excelTableByIndex(excel_file) for i in range(1, nrows): new_filepath = '/AIAA-FULLTXT/AIAA2018004/PDF/' + table_list[ i - 1]['filename'].replace('\\', '/') new_filesize = fileSizeChange(table_list[i - 1]['filesize']) new_issue_date = transTime(table_list[i - 1]['issuedate']) # print(new_filepath) wtsheet.write(i, index_filename, new_filepath) wtsheet.write(i, index_filesize, new_filesize) wtsheet.write(i, index_issue_date, new_issue_date) filename = excel_file.split(".")[0] + '.new.xls' print(filename) wtbook.save(filename)
def main(): excel_file = 'Z:/ALLPDF/SPIE-FULLTXT/2018005/SPIE期刊.xlsx' actual_filepath = 'Z:/ALLPDF/SPIE-FULLTXT/2018005/QK' table_list = excelTableByIndex(excel_file) report_type = 'QK' report_name = 'SPIE' filepath_in_computer = '/SPIE-FULLTXT/2018005/QK' new_excel_file = 'Z:/ALLPDF/SPIE-FULLTXT/2018005/SPIE期刊.new.xls' # step1:判断文件是否存在,如果没有缺失的文件,则继续往下执行 # fileExist(table_list, actual_filepath) # step2:更新字段,然后返回一个新的excel文件,并且文件名中带有.new,之后使用新生成的.xls文件进行后面的数据库插入操作, # 生成新的xls文件之后将这个函数注释掉 # updateData(excel_file, table_list, report_type, filepath_in_computer) # step3: 更新s_source中的数据,期刊用finalJournal,会议用finalMeeting,完成之后注释掉 if report_type == 'QK': finalJournal(new_excel_file) if report_type == 'HY': finalMeeting(new_excel_file) if report_type == 'report': finalReport(new_excel_file) if report_type == 'huibian': finalHuibian(new_excel_file) # step4:读取新生成的excel文件,然后取出需要的字段,对字段进行处理,转换成标准的格式 data_list = excelDataChange(new_excel_file, report_type, report_name) print(len(data_list)) # # # step5:将新数据插入对应的数据库表中 insertNewDataToS_data(data_list, report_name) # 根据id字段修改docid字段 changeDocidById(report_name)
def mainUpdateData(self): field_to_field_dict = self.mainGetFieldToFieldDict() excel_file = self.excel_filepath.get().replace('\\', '/') table_list = excelTableByIndex(excel_file) filepath_in_computer = self.filepath_in_computer.get().replace('\\', '/') updateData(excel_file, table_list, field_to_field_dict, filepath_in_computer) msgb.showinfo('congragulate!', 'success generate a new excel file!')
def mainFileExist(self): excel_file = self.excel_filepath.get().replace('\\', '/') actual_filepath = self.actual_filepath.get().replace('\\', '/') table_list = excelTableByIndex(excel_file) if fileExist(table_list, actual_filepath): msgb.showinfo('congragulate!', 'all file exist!') else: msgb.showerror('error!', 'some file not exist, please check it!')
def a(): db, cursor = connectDatabase() new_excel_file = 'Z:/ALLPDF/SPIE-FULLTXT/2018004/SPIE会议.new.xls' table_list = excelTableByIndex(new_excel_file) print(len(table_list)) for t in table_list: # print(t['filename']) confdate = t['MeetingDate'] sourcename = t['MeetingName'] sql = 'update s_source set pubdate="%s" where sourcename="%s"' % ('0000-00-00', sourcename) # print(sql) try: cursor.execute(sql) db.commit() except Exception as e: print(e) db.rollback() cursor.close() db.close()
def excelDataChange(excel_file): table_list = excelTableByIndex(excel_file) data_list = [] for t in table_list: temp_dict = {} temp_dict['sid'] = getSidFromS_sourceByMeetingname(t['journalname']) temp_dict['cid'] = getCid() temp_dict['year'] = t['year'] temp_dict['vol'] = t['volume'] temp_dict['encryptlevel'] = '1' temp_dict['language'] = 'eng' temp_dict['docmedia'] = 'P' temp_dict['doi'] = t['DOI'] if len(t['title']) > 200: temp_dict['mtitle'] = t['title'][0:199] else: temp_dict['mtitle'] = t['title'] if len(t['author']) > 200: temp_dict['authors'] = t['author'][0:199] else: temp_dict['authors'] = t['author'] if len(t['organ']) > 500: temp_dict['authorunit'] = t['organ'][0:499] else: temp_dict['authorunit'] = t['organ'] if len(t['keyword']) > 200: temp_dict['keyword'] = t['keyword'][0:199] else: temp_dict['keyword'] = t['keyword'] temp_dict['abstracts'] = t['abstract'] temp_dict['pages'] = int(t['pages']) temp_dict['bepage'] = t['strpage'] temp_dict['filename'] = t['filename'].split('/')[-1] temp_dict['filepath'] = t['filename'] temp_dict['filesize'] = t['filesize'] data_list.append(temp_dict) return data_list
def main(): excel_file = 'Z:/ALLPDF/AIAA-FULLTXT/AIAA2018004/AIAA期刊2.xlsx' actual_filepath = 'Z:/ALLPDF/AIAA-FULLTXT/AIAA2018004/PDF/' table_list = excelTableByIndex(excel_file) # step1:判断文件是否存在,成功后可以将函数注释掉,也可以不注释 # fileExist(table_list, actual_filepath) # step2:更新字段,然后返回一个新的excel文件,并且文件名中带有.new,之后使用新生成的.xls文件进行后面的数据库插入操作, # 生成新的xls文件之后将这个函数注释掉 # updateData(excel_file) new_excel_file = 'Z:/ALLPDF/AIAA-FULLTXT/AIAA2018004/AIAA期刊2.new.xls' # step3: 更新s_source中的数据,期刊用finalJournal,会议用finalMeeting,完成之后注释掉 # finalJournal(new_excel_file) # step4:读取新生成的excel文件,然后取出需要的字段,对字段进行处理,转换成标准的格式 data_list = excelDataChange(new_excel_file) print(len(data_list)) # step5:将新数据插入对应的数据库表中 insertNewDataToS_data(data_list) # 根据id字段修改docid字段 changeDocidById()
def finalJournal(excel_filename): source_list = getSourcenameFromEisc_data() table_list = excelTableByIndex(excel_filename) journal_list = getJournalList(table_list) insertS_sourceByJournal(journal_list, source_list)
def finalMeeting(excel_filename): source_list = getSourcenameFromEisc_data() table_list = excelTableByIndex(excel_filename) meeting_list = getMeetinglist(table_list) insertS_sourceByMeeting(meeting_list, source_list)