def do_POST(self): print( "incomming http: ", self.path ) if self.path.startswith("/export"): form = cgi.FieldStorage( fp=self.rfile, headers=self.headers, environ={'REQUEST_METHOD':'POST', 'CONTENT_TYPE':self.headers['Content-Type'], }) userid = form["userid"].value password = form["password"].value self.exporter = Exporter() #self.exporter = FakeLogic() exp = self.exporter exp.login(userid, password) def work(): self.exporter.getLikes() for fname in ["full.json", "neat.json"]: jsonThumbData[fname] = self.exporter.getJson(fname) worker = Thread(target=work) worker.start() self.send_response(200) self.send_header("Content-type", "text/plain") self.end_headers() self.wfile.write(bytes("", "utf-8"))
def main(): try: lib = Library(conf.library_dir) if conf.action == "clean": cleaner = Cleaner(lib) print("Removing duplicates..."), cleaner.remove_duplicates() print("DONE") cleaner.report() if not conf.test: lib.flush() print("\nTraktor library updated.") else: print("\nTest run. No changes made to the library.") elif conf.action == "export": exporter = Exporter(lib, conf.export_dir) exporter.export() except Exception as e: logger.error(e, exc_info=False)
# import openpyxl # wb=openpyxl.Workbook() # ws=wb.create_sheet('今日统计',0) # # for irow in range(100): # ws.append(['%d' % i for i in range(20)]) # wb.save(filename='test.xlsx') from export import Exporter e = Exporter() e.export('2016-10-23')
extractor.scan_folder(input_folder, extracted_info) else: logging.info( "The '{0}' input folder is a file!".format(input_folder)) else: logging.info( "The '{0}' input folder does not exist!".format(input_folder)) if not extracted_info: logging.critical("No information could be extracted.") sys.exit(1) else: exporter = Exporter(basename=args.basename, output_folder=output_folder) if args.consolidate: exporter.export_to_xlsx_consolidated(extracted_info) else: exporter.export_to_xlsx(extracted_info)
def _export(self, destination): Exporter(Library.instance(), destination).export()
def export(trans_date): e = Exporter() filename = e.export(trans_date) return send_from_directory('excel', filename, as_attachment=True)
def _detect_identifier_changes(self, folder_path, all_products_path, new_products_path, additions_path, errors_exporter, identifier_changes_path): if os.path.exists(all_products_path): all_f = SortedFile(all_products_path, sort_products_file, reader=CSVReader) new_products_f = SortedFile(new_products_path, sort_products_file, reader=CSVReader) deletions_path = os.path.join(folder_path, 'deletions.csv') deletions = open(deletions_path, 'w') total_additions_path = os.path.join(folder_path, 'total_additions.csv') total_additions = open(total_additions_path, 'w') sorted_join = SortedJoin(all_f, new_products_f) header = 'identifier,name,url,price,old_price,status,sku,category' \ ',brand,image_url,shipping_cost,stock,dealer' deletions_exporter = Exporter( output_file=deletions, header=header, format_func=lambda result: export_status(Change.OLD, result)) additions_exporter = Exporter( output_file=total_additions, header=header, format_func=lambda result: export_status(Change.NEW, result)) join_function1 = JoinFunction( PriceChange, [deletions_exporter, additions_exporter], settings=self.settings) sorted_join.full_join( join_function1, lambda x, y: locale.strcoll(x['identifier'], y['identifier'])) deletions.close() total_additions.close() add_temp = tempfile.mktemp() add_temp_f = open(add_temp, 'w') total_additions = SortedFile(total_additions_path, sort_products_file, reader=CSVReader) additions = SortedFile(additions_path, sort_products_file, reader=CSVReader) real_additions_exporter = Exporter( output_file=add_temp_f, header=header, format_func=lambda result: export_status( Change.NO_CHANGE, result)) join_func = JoinFunction(PriceChange, [real_additions_exporter], settings=self.settings) sorted_join = SortedJoin(total_additions, additions) sorted_join.inner_join( join_func, lambda x, y: locale.strcoll(x['identifier'], y['identifier'])) add_temp_f.close() shutil.copy(add_temp, additions_path) temp_ = tempfile.mktemp() sort_products_file(deletions_path, temp_, hash_func=ProductHash.hash) remove_non_unique_hashes(temp_) shutil.move(temp_, deletions_path) deletions = SortedFile(deletions_path, lambda x, y: shutil.copy(x, y), reader=CSVReader) additions = SortedFile(additions_path, lambda x, y: sort_products_file( x, y, hash_func=ProductHash.hash), reader=CSVReader) sorted_join = SortedJoin(deletions, additions) ident_changes = open(identifier_changes_path, 'w') def fmt(r): return to_csv([ r.new_element['name'], r.new_element['identifier'], r.new_element['url'], r.old_element['identifier'], r.old_element['url'] ]) identifier_exporter = Exporter( output_file=ident_changes, header='name,new_identifier,new_url,old_identifier,old_url', format_func=fmt) join_function = JoinFunction(IdentifierChange, [ IdentifierChangeValidator(errors_exporter, settings=self.settings), IdentifierChangeValidator(identifier_exporter, settings=self.settings) ], settings=self.settings) sorted_join.inner_join( join_function, lambda x, y: locale.strcoll( x['product_hash'], y['product_hash'])) ident_changes.close() if identifier_exporter.exported_lines == 0: os.unlink(identifier_changes_path)
def update(self, folder_path): # Update locale to avoid messing up the rules for sorting strings in python # with the rules from the sort command in linux locale.setlocale(locale.LC_ALL, '') old_products_path = os.path.join(folder_path, 'old_products.csv') new_products_path = os.path.join(folder_path, 'new_products.csv') all_products_path = os.path.join(folder_path, 'all_products.csv') old_meta_path = os.path.join(folder_path, 'old_meta.json-lines') new_meta_path = os.path.join(folder_path, 'new_meta.json-lines') new_meta_merged_path = os.path.join(folder_path, 'new_meta_merged.json-lines') product_changes_path = os.path.join(folder_path, 'changes.csv') meta_changes_path = os.path.join(folder_path, 'meta_changes.json-lines') additional_changes_path = os.path.join( folder_path, 'additional_changes.json-lines') errors_path = os.path.join(folder_path, 'errors.csv') identifier_changes_path = os.path.join(folder_path, 'identifier_changes.csv') errors_file = open(errors_path, 'w') errors_exporter = Exporter( output_file=errors_file, header='code,error', format_func=lambda result: result.format_csv()) if not os.path.exists(old_products_path): with open(old_products_path, 'w') as f: with open(new_products_path) as f1: line = f1.readline() f.write(line) if not os.path.exists(old_meta_path) and os.path.exists(new_meta_path): with open(old_meta_path, 'w'): pass if os.path.exists(old_products_path): old_dups_count = remove_duplicates(old_products_path) if os.path.exists(new_products_path): new_dups_count = remove_duplicates(new_products_path) self._detect_duplicate_identifiers(new_products_path, errors_exporter) old_products_file = SortedFile(old_products_path, sort_products_file, reader=CSVReader) new_products_file = SortedFile(new_products_path, sort_products_file, reader=CSVReader) sorted_join = SortedJoin(old_products_file, new_products_file) product_changes_file = open(product_changes_path, 'w') header = 'identifier,name,url,price,old_price,status,sku,category,' \ 'brand,image_url,shipping_cost,stock,dealer' changes_exporter = Exporter( output_file=product_changes_file, header=header, format_func=lambda result: result.format_csv()) additions_path = os.path.join(folder_path, 'additions.csv') additions = open(additions_path, 'w') additions_exporter = Exporter( output_file=additions, header=header, format_func=lambda result: export_status(Change.NEW, result)) changes_stats = ChangesStats() changes_validators = [ cls(errors_exporter, settings=self.settings) for cls in self.changes_validators ] join_function1 = JoinFunction( PriceChange, [changes_exporter, additions_exporter, changes_stats] + changes_validators, settings=self.settings) additional_change_file = open(additional_changes_path, 'w') additional_exporter = Exporter( output_file=additional_change_file, format_func=lambda result: result.format_json()) additional_stats = AdditionalChangesStats() additional_validators = [ cls(errors_exporter, settings=self.settings) for cls in self.additional_changes_validators ] join_function2 = JoinFunction(AdditionalChange, [additional_exporter, additional_stats] + additional_validators, settings=self.settings) join_function = CompositeJoinFunction([join_function1, join_function2]) sorted_join.full_join( join_function, lambda x, y: locale.strcoll(x['identifier'], y['identifier'])) product_changes_file.close() additions.close() # metadata meta_stats = None old_meta_dups_count = None new_meta_dups_count = None if os.path.exists(old_meta_path) and os.path.exists(new_meta_path): if os.path.exists(old_meta_path): old_meta_dups_count = remove_duplicates_meta(old_meta_path) if os.path.exists(new_meta_path): new_meta_dups_count = remove_duplicates_meta(new_meta_path) old_meta_file = SortedFile(old_meta_path, sort_metadata_file, reader=JsonLinesReader) new_meta_file = SortedFile(new_meta_path, sort_metadata_file, reader=JsonLinesReader) new_meta_merged_file = open(new_meta_merged_path, 'w') def format_meta_merge(result): if result.new_element: return json.dumps(result.new_element) else: r = copy.copy(result.old_element) r['_status'] = 'old' return json.dumps(r) meta_merged_export = Exporter(output_file=new_meta_merged_file, format_func=format_meta_merge) merge_function = JoinFunction(MetadataChange, [MergeReviews(), meta_merged_export], settings=self.settings) meta_join = SortedJoin(old_meta_file, new_meta_file) meta_join.full_join( merge_function, lambda x, y: locale.strcoll(x['identifier'], y['identifier'])) new_meta_file.close() new_meta_merged_file.close() shutil.move(new_meta_merged_path, new_meta_path) old_meta_file = SortedFile(old_meta_path, sort_metadata_file, reader=JsonLinesReader) new_meta_file = SortedFile(new_meta_path, sort_metadata_file, reader=JsonLinesReader) meta_join = SortedJoin(old_meta_file, new_meta_file) meta_changes_file = open(meta_changes_path, 'w') meta_changes_exporter = Exporter( output_file=meta_changes_file, accept_codes=[Change.UPDATE], format_func=lambda result: result.format_json()) meta_validators = [ cls(errors_exporter, settings=self.settings) for cls in self.metadata_changes_validators ] meta_stats = MetadataChangesStats() meta_function = JoinFunction(MetadataChange, [meta_changes_exporter, meta_stats] + meta_validators, settings=self.settings) meta_join.full_join( meta_function, lambda x, y: locale.strcoll(x['identifier'], y['identifier'])) stats = changes_stats.stats.copy() matched_deletions, matched = self._compute_matched_deletions( folder_path, all_products_path, product_changes_path) stats['matched_deletions'] = matched_deletions stats['matched'] = matched stats.update(additional_stats.stats) if meta_stats: stats.update(meta_stats.stats) stats['old_meta_dups_count'] = old_meta_dups_count stats['new_meta_dups_count'] = new_meta_dups_count change_val = ChangesValidator(settings=self.settings) additional_val = AdditionalChangesValidator(settings=self.settings) meta_val = MetadataChangesValidator(settings=self.settings) for error in change_val.validate(stats): errors_exporter.export(error) for error in additional_val.validate(stats): errors_exporter.export(error) if meta_stats: for error in meta_val.validate(stats): errors_exporter.export(error) if not self.settings.get('ignore_identifier_changes'): self._detect_identifier_changes(folder_path, all_products_path, new_products_path, additions_path, errors_exporter, identifier_changes_path) stats['errors_found'] = errors_exporter.exported_lines > 0 stats['old_dups_count'] = old_dups_count stats['new_dups_count'] = new_dups_count return stats
import os # from multiprocessing import Pool from context import Context from export import Exporter # program file dir # mill line tag as string line = sys.argv[1] # dir definition 传入的 %cd% 即为root dir root_dir = sys.argv[2].replace("\\", "/") print(root_dir) # context init ctx = Context(root_dir) # exporter exporter = Exporter(ctx) exporter.pond_export() # if __name__ == '__main__': # print('Parent process {}.'.format(os.getpid())) # p = Pool(4) # p.apply_async(exporter.pond_export, args=("pond",)) # print('Waiting for all subprocesses done...') # p.close() # p.join() # print('All subprocesses done.')