def main(source_, citylist_, period_): global error_cities global error_messages source = source_ citylist = citylist_ period = period_ if source: source = source.split(",") print "source: ", source if citylist: citylist = citylist.split(",") print "city list: ", citylist while True: if source: app = AsyncPipeline() app.start(source, citylist) else: break # rescue if len(error_cities) > 0: for src in error_cities.iterkeys(): print "Try to rescue", src remain_cities = error_cities[src] error_cities[src] = [] error_messages[src] = [] app = AsyncPipeline() app.start([src], remain_cities) # archive first archiver = Archiver() for src in source: archiver.archive(src, src, True) # False achive locally, True achive to S3 # repeat if not period: break time.sleep( int(period) * 3600 ) error_cities = {} error_messages = {} # check config stop_crawl = 0 check_config = CheckConfig() config = check_config.check('crawl_config') for src in source: if src in config: if "period" in config[src]: period = config[src]["period"] if "stop" in config[src]: stop_crawl = config[src]["stop"] break if stop_crawl == 1: break
def destalinate_job(): print("Destalinating") if "SB_TOKEN" not in os.environ or "API_TOKEN" not in os.environ: print "ERR: Missing at least one Slack environment variable." else: warner = Warner() archiver = Archiver() announcer = Announcer() flagger = Flagger() print("Warning") warner.warn() print("Archiving") archiver.archive() print("Announcing") announcer.announce() print("Flagging") flagger.flag() print("OK: destalinated") print("END: destalinate_job")
def deploy(slug, testing_url, production_url, theme_url, production_server, production_dir): build_dir = os.path.join(SETTINGS.BUILD_DIR, slug) archive_dir = os.path.join(SETTINGS.ARCHIVE_DIR, slug) compiler = Compiler(build_dir, testing_url, production_url, theme_url) compiler.compile() archiver = Archiver(slug, build_dir, archive_dir) archive = archiver.archive() deployer = Deployer(production_server, SETTINGS.SSH_KEY, archive_dir, production_dir) deployer.deploy(archive) return True
class Snapper: def __init__(self, cam_config, storage): self.storage = storage self.cam = IpCamSnapper(cam_config) self.writer = FileDao() self.archiver = Archiver(self.storage, self.cam) self.store_in = self.storage.get_cam_snap_path(self.cam.name) def snap(self): snap = None try: snap = self.cam.snap() except Exception as e: print(e.args) print("exception in snap") # TODO: log exception if snap: filename = snap.name + os.extsep + self.cam.ext self.writer.set_directory(self.store_in).write( filename, snap.content) def archive(self): self.archiver.archive()
def main(source_, citylist_, period_): source = source_ citylist = citylist_ period = period_ if source: source = source.split(",") print "source: ", source if citylist: citylist = citylist.split(",") print "city list: ", citylist while True: if not source: break sleep_interval = (0,30) if "meituan" in source: meituan_app = Pipeline(MeituanCrawler(), MeituanParser(), None, "meituan") if not citylist: error = meituan_app.start(sleep_interval) if len(error) > 0: meituan_app.rescue(error, sleep_interval) else: meituan_app.rescue(citylist, sleep_interval) if "nuomi" in source: nuomi_app = Pipeline(NuomiCrawler(), NuomiParser(), None, "nuomi") if not citylist: error = nuomi_app.start(sleep_interval) if len(error) > 0: nuomi_app.rescue(error, sleep_interval) else: nuomi_app.rescue(citylist, sleep_interval) if "lashou" in source: lashou_app = Pipeline(LashouCrawler(), LashouParser(), None, "lashou") if not citylist: error = lashou_app.start(sleep_interval) if len(error) > 0: lashou_app.rescue(error, sleep_interval) else: lashou_app.rescue(citylist, sleep_interval) if "wowo" in source: wowo_app = Pipeline(WowoCrawler(), WowoParser(), None, "wowo") if not citylist: error = wowo_app.start(sleep_interval) if len(error) > 0: wowo_app.rescue(error, sleep_interval) else: wowo_app.rescue(citylist, sleep_interval) if "dida" in source: dida_app = Pipeline(DidaCrawler(), DidaParser(), None, "dida") if not citylist: error = dida_app.start(sleep_interval) if len(error) > 0: dida_app.rescue(error, sleep_interval) else: dida_app.rescue(citylist, sleep_interval) if "dianping" in source: dianping_app = Pipeline(DianpingCrawler(), DianpingParser(), None, "dianping") if not citylist: error = dianping_app.start(sleep_interval) if len(error) > 0: dianping_app.rescue(error, sleep_interval) else: dianping_app.rescue(citylist, sleep_interval) if "manzuo" in source: manzuo_app = Pipeline(ManzuoCrawler(), ManzuoParser(), None, "manzuo") if not citylist: error = manzuo_app.start(sleep_interval) if len(error) > 0: manzuo_app.rescue(error, sleep_interval) else: manzuo_app.rescue(citylist, sleep_interval) if "ftuan" in source: ftuan_app = Pipeline(FtuanCrawler(), FtuanParser(), None, "ftuan") if not citylist: error = ftuan_app.start(sleep_interval) if len(error) > 0: ftuan_app.rescue(error, sleep_interval) else: ftuan_app.rescue(citylist, sleep_interval) if "wuba" in source: wuba_app = Pipeline(WubaCrawler(), WubaParser(), None, "wuba") if not citylist: error = wuba_app.start(sleep_interval) if len(error) > 0: wuba_app.rescue(error, sleep_interval) else: wuba_app.rescue(citylist, sleep_interval) # archive first archiver = Archiver() for src in source: archiver.archive(src, src, True) # False achive locally, True achive to S3 # repeat if not period: break time.sleep( int(period) * 3600 ) # check config file stop_crawl = 0 check_config = CheckConfig() config = check_config.check('crawl_config') for src in source: if src in config: if "period" in config[src]: period = config[src]["period"] if "stop" in config[src]: stop_crawl = config[src]["stop"] break if stop_crawl == 1: break
def parse(self): data_type = "invoices" xero_url_accrec = "https://go.xero.com/AccountsReceivable/View.aspx?InvoiceID=" xero_url_accpay = "https://go.xero.com/AccountsPayable/View.aspx?InvoiceID=" proc_dir = "processing/default" json_file_name = "{}/{}/{}.json".format(self.data_dir, proc_dir, data_type) csv_file_name = "{}/{}/{}.csv".format(self.data_dir, proc_dir, data_type) if not os.path.isfile(json_file_name): self.log.write("ERROR {} file does not exist, did you forget to extract this?".format(json_file_name)) return False with open(json_file_name, encoding='utf-8') as f: data = json.load(f) collection = 'Invoices' if collection not in data: self.log.write("ERROR '{}' collection not found in JSON file".format(collection)) return # zero ts in json header zero_created_datetime = clean_date(data['DateTimeUTC']) col_header = """ InvoiceID,Type,InvoiceNumber,Reference, AmountDue,AmountPaid,AmountCredited,CurrencyRate,IsDiscounted,HasAttachments,HasErrors, ContactID,Name,Date,BrandingThemeID,BrandingThemeName,Status,LineAmountTypes, SubTotal,TotalTax,Total,UpdatedDateUTC,CurrencyCode,ProcessingNotes, URL """ csv_file = open(csv_file_name, 'w', encoding='utf-8') csv_file.write(re.sub(r"[\n\t\s]", "", col_header) + "\n") # read in branding themes themes_csv_file = self.data_dir + "/processing/default/branding-themes.csv" if not os.path.isfile(themes_csv_file): self.log.write("ERROR {} file does not exist, did you forget to extract this?".format(themes_csv_file)) return themes = {} with open(themes_csv_file, "r") as f: reader = csv.reader(f, delimiter = ",") for j, line in enumerate(reader): if j > 0: themes[line[0]] = line[1] i = 0 for invoice in data['Invoices']: i = i + 1 type = invoice['Type'] if 'Type' in invoice else '' invoice_id = invoice['InvoiceID'] invoice_number = invoice['InvoiceNumber'] if 'InvoiceNumber' in invoice else '' reference = invoice['Reference'] if 'Reference' in invoice else '' amount_due = invoice['AmountDue'] if 'AmountDue' in invoice else 0.00 amount_paid = invoice['AmountPaid'] if 'AmountPaid' in invoice else 0.00 amount_credited = invoice['AmountCredited'] if 'AmountCredited' in invoice else 0.00 currency_rate = invoice['CurrencyRate'] if 'CurrencyRate' in invoice else 0.00 is_discounted = invoice['IsDiscounted'] if 'IsDiscounted' in invoice else '' has_attachments = invoice['HasAttachments'] if 'HasAttachments' in invoice else '' has_errors = invoice['HasErrors'] if 'HasErrors' in invoice else '' if 'Contact' in invoice and invoice['Contact']: contact = invoice['Contact'] contact_id = contact['ContactID'] name = contact['Name'] if 'Name' in contact else '' else: contact = "" contact_id = "" name = "" # use DateString date = (invoice['DateString'])[:10] if 'DateString' in invoice else '' branding_theme_id = invoice['BrandingThemeID'] if 'BrandingThemeID' in invoice else '' status = invoice['Status'] if 'Status' in invoice else '' line_amount_types = invoice['LineAmountTypes'] if 'LineAmountTypes' in invoice else '' sub_total = invoice['SubTotal'] if 'SubTotal' in invoice else '' total_tax = invoice['TotalTax'] if 'TotalTax' in invoice else '' total = invoice['Total'] if 'Total' in invoice else '' updated_date_utc = clean_date(invoice['UpdatedDateUTC']) if 'UpdatedDateUTC' in invoice else '' currency_code = invoice['CurrencyCode'] if 'CurrencyCode' in invoice else '' if type == "ACCPAY": url = xero_url_accpay + invoice_id elif type == "ACCREC": url = xero_url_accrec + invoice_id else: url = "" # get branding theme name processing_notes = "" if branding_theme_id in themes.keys(): branding_theme_name = themes[branding_theme_id] else: branding_theme_name = "" processing_note = "branding theme id not found" columns = [ invoice_id, type, invoice_number, reference, amount_due, amount_paid, amount_credited, currency_rate, is_discounted, has_attachments, has_errors, contact_id, name, date, branding_theme_id, branding_theme_name, status, line_amount_types, sub_total, total_tax, total, updated_date_utc, currency_code, processing_notes, url ] prep_columns = list(map(lambda col: "\"" + str(col) + "\"", columns)) line = ",".join(prep_columns) + "\n" csv_file.write(line) csv_file.close() self.log.write("INFO [{}] CSV file created {} ({:,} records)".format(data_type, csv_file_name, i)) formats = [ '', '', '', '', '0.00', '0.00', '0.00', '0.00', '', '', '', '', '', 'short', '', '', '', '', '0.00', '0.00', '0.00', 'long', '', '', '' ] ark = Archiver(self.log) ark.archive(data_type, json_file_name) ark.archive(data_type, csv_file_name) ark.copy(data_type, csv_file_name, 'master') ark.copy(data_type, csv_file_name, 'current', excelize=True, xlsx_formats=formats)
#! /usr/bin/env python from warner import Warner from archiver import Archiver if __name__ == "__main__": warner = Warner() archiver = Archiver() warner.warn() archiver.archive()
def parse(self): data_type = "branding-themes" proc_dir = "processing/default" json_file_name = "{}/{}/{}.json".format(self.data_dir, proc_dir, data_type) csv_file_name = "{}/{}/{}.csv".format(self.data_dir, proc_dir, data_type) if not os.path.isfile(json_file_name): self.log.write( "ERROR {} file does not exist, did you forget to extract this?" .format(json_file_name)) return False with open(json_file_name, encoding='utf-8') as f: data = json.load(f) collection = 'BrandingThemes' if collection not in data: self.log.write( "ERROR '{}' collection not found in JSON file".format( collection)) return # zero ts in json header zero_created_datetime = clean_date(data['DateTimeUTC']) col_header = "BrandingThemeID,Name,LogoUrl,Type,SortOrder,CreatedDateUTC" csv_file = open(csv_file_name, 'w', encoding='utf-8') csv_file.write(re.sub(r'\n', '', col_header) + "\n") i = 0 for theme in data['BrandingThemes']: i = i + 1 id = theme['BrandingThemeID'] name = theme['Name'] url = theme['LogoUrl'] if 'LogoUrl' in theme else '' type = theme['Type'] if 'Type' in theme else '' sort_order = theme['SortOrder'] if 'SortOrder' in theme else '' created_date = clean_date(theme['CreatedDateUTC']) columns = [id, name, url, type, sort_order, created_date] prep_columns = list( map(lambda col: "\"" + str(col) + "\"", columns)) line = ",".join(prep_columns) + "\n" csv_file.write(line) csv_file.close() self.log.write("INFO [{}] CSV file created {} ({:,} records)".format( data_type, csv_file_name, i)) ark = Archiver(self.log) ark.archive(data_type, json_file_name)
def parse(self): data_type = "items" xero_url = "https://go.xero.com/Accounts/Inventory/" proc_dir = "processing/default" json_file_name = "{}/{}/{}.json".format(self.data_dir, proc_dir, data_type) csv_file_name = "{}/{}/{}.csv".format(self.data_dir, proc_dir, data_type) if not os.path.isfile(json_file_name): self.log.write("ERROR {} file does not exist, did you forget to extract this?".format(json_file_name)) return False with open(json_file_name, encoding='utf-8') as f: data = json.load(f) collection = 'Items' if collection not in data: self.log.write("ERROR '{}' collection not found in JSON file".format(collection)) return # zero ts in json header zero_created_datetime = clean_date(data['DateTimeUTC']) col_header = """ ItemID,Code,Code2,Description,PurchaseDescription,UpdatedDateUTC,PurchasedUnitPrice,PurchasedCOGSAccountCode, PurchasedTaxType,SalesUnitPrice,SalesAccountCode,SalesTaxType,Name,IsTrackedAsInventory, InventoryAssetAccountCode,TotalCostPool,QuantityOnHand,IsSold,IsPurchased, SupplierCode,ProductDescription,ProductSegment1,ProductSegment2, PurchaseSupplierCode,PurchaseProductDescription,PurchaseProductSegment1,PurchaseProductSegment2, ProcessingNotes, URL """ csv_file = open(csv_file_name, 'w', encoding='utf-8') csv_file.write(re.sub(r"[\n\t\s]", "", col_header) + "\n") i = 0 for item in data[collection]: i = i + 1 item_id = item['ItemID'] code = item['Code'] description = item['Description'] purchase_description = item['PurchaseDescription'] if 'PurchaseDescription' in item else '' updated_date_utc = clean_date(item['UpdatedDateUTC']) if 'UpdatedDateUTC' in item else '' if 'PurchaseDetails' in item and item['PurchaseDetails']: details = item['PurchaseDetails'] purchase_unit_price = details['UnitPrice'] if 'UnitPrice' in details else '' purchase_cogs_account_code = details['COGSAccountCode'] if 'COGSAccountCode' in details else '' purchase_tax_type = details['TaxType'] if 'TaxType' in details else '' else: purchase_unit_price = '' purchase_cogs_account_code = '' purchase_tax_type = '' if 'SalesDetails' in item and item['SalesDetails']: details = item['SalesDetails'] sales_unit_price = details['UnitPrice'] if 'UnitPrice' in details else '' sales_account_code = details['AccountCode'] if 'AccountCode' in details else '' sales_tax_type = details['TaxType'] if 'TaxType' in details else '' else: sales_unit_price = '' sales_account_code = '' sales_tax_type = '' name = item['Name'] is_tracked_as_inventory = item['IsTrackedAsInventory'] if 'IsTrackedAsInventory' in item else' ' inventory_asset_account_code = item['InventoryAssetAccountCode'] if 'InventoryAssetAccountCode' in item else' ' total_cost_pool = item['TotalCostPool'] if 'TotalCostPool' in item else' ' quantity_on_hand = item['QuantityOnHand'] if 'QuantityOnHand' in item else' ' is_sold = item['IsSold'] if 'IsSold' in item else' ' is_purchased = item['IsPurchased'] if 'IsPurchased' in item else' ' # some codes have an addiitonal code piped on to them code2 = "" parts = code.split("|") if len(parts) == 2: code = parts[0].strip() code2 = parts[1].strip() processing_notes = "" supplier_code = "" product_description = "" product_segment_1 = "" product_segment_2 = "" purchase_supplier_code = "" purchase_product_description = "" purchase_product_segment_1 = "" purchase_product_segment_2 = "" # parse desc's for supplier code, desc, product segment 1, product segment 2 parts = description.split("|") if len(parts) != 4: processing_notes = "malformed [Description] field" else: supplier_code = parts[0].strip() product_description = parts[1].strip() product_segment_1 = parts[2].strip() product_segment_2 = parts[3].strip() parts = purchase_description.split("|") if len(parts) != 4: if not processing_notes: ProcessingNotes = "malformed [PurchaseDescription] field" else: processing_notes = processing_notes + "/" + "malformed [PurchaseDescription] field" else: purchase_supplier_code = parts[0].strip() purchase_product_description = parts[1].strip() purchase_product_segment_1 = parts[2].strip() purchase_product_segment_2 = parts[3].strip() url = xero_url + item_id columns = [ item_id, code, code2, description, purchase_description, updated_date_utc, purchase_unit_price, purchase_cogs_account_code, purchase_tax_type, sales_unit_price, sales_account_code, sales_tax_type, name, is_tracked_as_inventory, inventory_asset_account_code, total_cost_pool, quantity_on_hand, is_sold, is_purchased, supplier_code, product_description, product_segment_1, product_segment_2, purchase_supplier_code, purchase_product_description, purchase_product_segment_1, purchase_product_segment_2, processing_notes, url ] prep_columns = list(map(lambda col: "\"" + str(col) + "\"", columns)) line = ",".join(prep_columns) + "\n" csv_file.write(line) csv_file.close() self.log.write("INFO [{}] CSV file created {} ({:,} records)".format(data_type, csv_file_name, i)) formats = [ '', '', '', '', '', 'long', '0.00', '', '', '0.00', '', '', '', '', '', '0.00', '0.00', '', '', '', '', '', '', '', '', '', '', '', '' ] ark = Archiver(self.log) ark.archive(data_type, json_file_name) ark.archive(data_type, csv_file_name) ark.copy(data_type, csv_file_name, 'master') ark.copy(data_type, csv_file_name, 'current', excelize=True, xlsx_formats=formats)
def parse(self): h_data_type = "invoice-headers" d_data_type = "invoice-line-items" proc_dir = "processing/invoices" json_dir = "{}/{}".format(self.data_dir, proc_dir) json_files = [f for f in os.listdir(json_dir) if f.endswith('.json')] if not json_files: self.log.write( "ERROR {}/*.json files do not exist, did you forget to extract this?" .format(json_dir)) return False xero_url_accrec = "https://go.xero.com/AccountsReceivable/View.aspx?InvoiceID=" xero_url_accpay = "https://go.xero.com/AccountsPayable/View.aspx?InvoiceID=" csv_header_file_name = "{}/{}-delta.csv".format(json_dir, h_data_type) csv_detail_file_name = "{}/{}-delta.csv".format(json_dir, d_data_type) header_col_header = """ Type,InvoiceID,InvoiceNumber,Reference, AmountDue,AmountPaid,AmountCredited,CurrencyRate,IsDiscounted,HasAttachments,HasErrors, ContactID,Name,Date,BrandingThemeID,BrandingThemeName,Status,LineAmountTypes, SubTotal,TotalTax,Total,UpdatedDateUTC,CurrencyCode,ProcessingNotes, URL """ detail_col_header = """ InvoiceID,InvoiceNumber,Type,Date,LineItemNumber, LineItemID,ItemCode,ItemCode2,Description,UnitAmount,TaxAmount,LineAmount,AccountCode,Quantity """ csv_header_file = open(csv_header_file_name, 'w', encoding='utf-8') csv_header_file.write( re.sub(r"[\n\t\s]", "", header_col_header) + "\n") csv_detail_file = open(csv_detail_file_name, 'w', encoding='utf-8') csv_detail_file.write( re.sub(r"[\n\t\s]", "", detail_col_header) + "\n") # read in branding themes themes_csv_file = self.data_dir + "/processing/default/branding-themes.csv" if not os.path.isfile(themes_csv_file): msg = "ERROR {} file does not exist, did you forget to extract this?" print(msg) self.log.write(msg) return themes = {} with open(themes_csv_file, "r") as f: reader = csv.reader(f, delimiter=",") for j, line in enumerate(reader): if j > 0: themes[line[0]] = line[1] i = 0 j = 0 for file in json_files: i = i + 1 self.log.write( "INFO [invoice-line-items] processing file {}".format(file)) json_file_name = "{}/{}".format(json_dir, file) with open(json_file_name, encoding='utf-8') as f: data = json.load(f) collection = 'Invoices' if collection not in data: self.log.write( "ERROR '{}' collection not found in JSON file {}".format( collection, file)) continue # zero ts in json header zero_created_datetime = clean_date(data['DateTimeUTC']) # single invoice, but is part of a collection invoice = data['Invoices'][0] type = invoice['Type'] if 'Type' in invoice else '' invoice_id = invoice['InvoiceID'] invoice_number = invoice[ 'InvoiceNumber'] if 'InvoiceNumber' in invoice else '' reference = invoice['Reference'] if 'Reference' in invoice else '' amount_due = invoice[ 'AmountDue'] if 'AmountDue' in invoice else 0.00 amount_paid = invoice[ 'AmountPaid'] if 'AmountPaid' in invoice else 0.00 amount_credited = invoice[ 'AmountCredited'] if 'AmountCredited' in invoice else 0.00 currency_rate = invoice[ 'CurrencyRate'] if 'CurrencyRate' in invoice else 0.00 is_discounted = invoice[ 'IsDiscounted'] if 'IsDiscounted' in invoice else '' has_attachments = invoice[ 'HasAttachments'] if 'HasAttachments' in invoice else '' has_errors = invoice['HasErrors'] if 'HasErrors' in invoice else '' if 'Contact' in invoice and invoice['Contact']: contact = invoice['Contact'] contact_id = contact['ContactID'] name = contact['Name'] if 'Name' in contact else '' else: contact = "" contact_id = "" name = "" # use DateString date = ( invoice['DateString'])[:10] if 'DateString' in invoice else '' branding_theme_id = invoice[ 'BrandingThemeID'] if 'BrandingThemeID' in invoice else '' status = invoice['Status'] if 'Status' in invoice else '' line_amount_types = invoice[ 'LineAmountTypes'] if 'LineAmountTypes' in invoice else '' sub_total = invoice['SubTotal'] if 'SubTotal' in invoice else '' total_tax = invoice['TotalTax'] if 'TotalTax' in invoice else '' total = invoice['Total'] if 'Total' in invoice else '' updated_date_utc = clean_date( invoice['UpdatedDateUTC'] ) if 'UpdatedDateUTC' in invoice else '' currency_code = invoice[ 'CurrencyCode'] if 'CurrencyCode' in invoice else '' if type == "ACCPAY": url = xero_url_accpay + invoice_id elif type == "ACCREC": url = xero_url_accrec + invoice_id else: url = "" # get branding theme name processing_notes = "" if branding_theme_id in themes.keys(): branding_theme_name = themes[branding_theme_id] else: branding_theme_name = "" processing_note = "branding theme id not found" columns = [ type, invoice_id, invoice_number, reference, amount_due, amount_paid, amount_credited, currency_rate, is_discounted, has_attachments, has_errors, contact_id, name, date, branding_theme_id, branding_theme_name, status, line_amount_types, sub_total, total_tax, total, updated_date_utc, currency_code, processing_notes, url ] prep_columns = list( map(lambda col: "\"" + str(col) + "\"", columns)) line = ",".join(prep_columns) + "\n" csv_header_file.write(line) # process line items if 'LineItems' not in invoice: self.log.write( "WARN no line items found for invoice {}".format( invoice_id)) continue line_items = invoice['LineItems'] # artificial li number line_item_num = 0 for line_item in line_items: # total line items j = j + 1 line_item_num = line_item_num + 1 li_lid = line_item['LineItemID'] li_item_code = line_item[ 'ItemCode'] if 'ItemCode' in line_item else '' li_desc = line_item[ 'Description'] if 'Description' in line_item else '' li_unit_amt = line_item[ 'UnitAmount'] if 'UnitAmount' in line_item else 0.00 li_tax_amt = line_item[ 'TaxAmount'] if 'TaxAmount' in line_item else 0.00 li_line_amt = line_item[ 'LineAmount'] if 'LineAmount' in line_item else 0.00 li_acct_code = line_item[ 'AccountCode'] if 'AccountCode' in line_item else '' li_qty = line_item[ 'Quantity'] if 'Quantity' in line_item else 0.00 # desc field allows cr's, strip here li_desc = li_desc.strip("\n") # some codes have an addiitonal code piped on to them li_item_code2 = "" parts = li_item_code.split("|") if len(parts) == 2: li_item_code = parts[0].strip() li_item_code2 = parts[1].strip() columns = [ invoice_id, invoice_number, type, date, line_item_num, li_lid, li_item_code, li_item_code2, li_desc, li_unit_amt, li_tax_amt, li_line_amt, li_acct_code, li_qty ] prep_columns = list( map(lambda col: "\"" + str(col) + "\"", columns)) line = ",".join(prep_columns) + "\n" csv_detail_file.write(line) csv_header_file.close() self.log.write("INFO [{}] CSV file created {} ({:,} records)".format( 'invoice-headers', csv_header_file_name, i)) csv_detail_file.close() self.log.write("INFO [{}] CSV file created {} ({:,} records)".format( 'invoice-line-items', csv_detail_file_name, i)) ark = Archiver(self.log) files = list( map(lambda file: "{}/{}".format(json_dir, file), json_files)) ark.archive('invoices', files) ark.archive(h_data_type, csv_header_file_name) ark.archive(d_data_type, csv_detail_file_name)
def merge_invoice_delta(self): # master + (daily) delta files header_master_file = "{}/master/invoice-headers.csv".format( self.data_dir) header_delta_file = "{}/processing/invoices/invoice-headers-delta.csv".format( self.data_dir) detail_master_file = "{}/master/invoice-line-items.csv".format( self.data_dir) detail_delta_file = "{}/processing/invoices/invoice-line-items-delta.csv".format( self.data_dir) # read in as df's df_header_master = pd.read_csv(header_master_file, index_col='InvoiceID') df_header_delta = pd.read_csv(header_delta_file, index_col='InvoiceID') df_detail_master = pd.read_csv( detail_master_file) #, index_col='LineItemID') df_detail_delta = pd.read_csv( detail_delta_file) #, index_col='LineItemID') hm_cnt = df_header_master.shape[0] hd_cnt = df_header_delta.shape[0] #print("{:,} rows in header master".format(hm_cnt)) #print("{:,} rows in header delta".format(hd_cnt)) dm_cnt = df_detail_master.shape[0] dd_cnt = df_detail_delta.shape[0] #print("{:,} rows in detail master".format(dm_cnt)) #print("{:,} rows in detail delta".format(dd_cnt)) h_del_cnt = 0 d_del_cnt = 0 # loop through invoice header delta for id, row in df_header_delta.iterrows(): # id record exists delete it (will be re-inserted next) if id in df_header_master.index.values: # delete header row h_del_cnt = h_del_cnt + 1 df_header_master.drop(id, inplace=True) # delete related detail rows d_del_cnt = d_del_cnt + df_detail_master[ df_detail_master['InvoiceID'] == id].shape[0] df_detail_master.drop(df_detail_master[ df_detail_master['InvoiceID'] == id].index, inplace=True) # concat master files (with deletes) + delta files = UPSERTED master files df_new_header_master = pd.concat([df_header_master, df_header_delta]) df_new_detail_master = pd.concat([df_detail_master, df_detail_delta]) new_header_master_file = "{}/processing/invoices/invoice-headers.csv".format( self.data_dir) new_detail_master_file = "{}/processing/invoices/invoice-line-items.csv".format( self.data_dir) if os.path.exists(new_header_master_file): os.remove(new_header_master_file) df_new_header_master.to_csv(new_header_master_file, header=True, index=True, quoting=csv.QUOTE_ALL) if os.path.exists(new_detail_master_file): os.remove(new_detail_master_file) df_new_detail_master.to_csv(new_detail_master_file, header=True, index=False, quoting=csv.QUOTE_ALL) self.log.write( "INFO [invoice-headers] {:,} invoice records inserted into header master" .format(df_new_header_master.shape[0] - hm_cnt)) self.log.write( "INFO [invoice-headers] {:,} invoice records updated in header master" .format(hd_cnt - (df_new_header_master.shape[0] - hm_cnt))) self.log.write( "INFO [invoice-headers] master file written to {}".format( new_header_master_file)) self.log.write( "INFO [invoice-details] {:,} invoice records inserted into detail master" .format(df_new_detail_master.shape[0] - dm_cnt)) self.log.write( "INFO [invoice-details] {:,} invoice records updated in detail master" .format(dd_cnt - (df_new_detail_master.shape[0] - dm_cnt))) self.log.write( "INFO [invoice-details] master file written to {}".format( new_detail_master_file)) ark = Archiver(self.log) ark.archive('invoice-headers', new_header_master_file) ark.archive('invoice-line-items', new_detail_master_file) ark.copy('invoice-headers', new_header_master_file, 'master') ark.copy('invoice-line-items', new_detail_master_file, 'master') formats = [ '', '', '', '', '0.00', '0.00', '0.00', '0.00', '', '', '', '', '', 'short', '', '', '', '', '0.00', '0.00', '0.00', 'long', '', '', '' ] ark.copy('invoice-headers', new_header_master_file, 'current', excelize=True, xlsx_formats=formats) formats = [ '', '', '', 'short', '0', '', '', '', '', '0.00', '0.00', '0.00', '', '0.00' ] ark.copy('invoice-line-items', new_detail_master_file, 'current', excelize=True, xlsx_formats=formats)
def parse(self): data_type = "contacts" xero_url = "https://go.xero.com/Contacts/View/" proc_dir = "processing/default" json_file_name = "{}/{}/{}.json".format(self.data_dir, proc_dir, data_type) csv_file_name = "{}/{}/{}.csv".format(self.data_dir, proc_dir, data_type) if not os.path.isfile(json_file_name): self.log.write( "ERROR {} file does not exist, did you forget to extract this?" .format(json_file_name)) return False with open(json_file_name, encoding='utf-8') as f: data = json.load(f) collection = 'Contacts' if collection not in data: self.log.write( "ERROR '{}' collection not found in JSON file".format( collection)) return # zero ts in json header #zero_created_datetime = clean_date(data['DateTimeUTC']) col_header = """ ContactID,AccountNumber,ContactStatus,Name,FirstName,LastName,EmailAddress,SkypeUserName,Segment1,Segment2,Segment3,BankAccountDetails,TaxNumber, Street_City,Street_Region,Street_PostalCode,Street_Country, POBOX_AddressLine1,POBOX_AddressLine2,POBOX_AddressLine3,POBOX_AddressLine4,POBOX_City,POBOX_Region,POBOX_PostalCode,POBOX_Country,POBOX_AttentionTo, DEFAULT_PhoneNumber,DEFAULT_PhoneAreaCode,DEFAULT_PhoneCountryCode, MOBILE_PhoneNumber,MOBILE_PhoneAreaCode,MOBILE_PhoneCountryCode, FAX_PhoneNumber,FAX_PhoneAreaCode,FAX_PhoneCountryCode, DDI_PhoneNumber,DDI_PhoneAreaCode,DDI_PhoneCountryCode, UpdatedDateUTC,IsSupplier,IsCustomer, ProcessingNotes,URL """ csv_file = open(csv_file_name, 'w', encoding='utf-8') csv_file.write(re.sub(r"[\n\t\s]", "", col_header) + "\n") i = 0 for contact in data[collection]: i = i + 1 contact_id = contact['ContactID'] account_number = contact[ 'AccountNumber'] if 'AccountNumber' in contact else '' contact_status = contact[ 'ContactStatus'] if 'ContactStatus' in contact else '' name = contact['Name'] if 'Name' in contact else '' first_name = contact['FirstName'] if 'FirstName' in contact else '' last_name = contact['LastName'] if 'LastName' in contact else '' email_address = contact[ 'EmailAddress'] if 'EmailAddress' in contact else '' # parse segments skype_user_name = contact[ 'SkypeUserName'] if 'SkypeUserName' in contact else '' parts = skype_user_name.split("|") if len(parts) == 3: segment_1 = parts[0].strip() segment_2 = parts[1].strip() segment_3 = parts[2].strip() processing_notes = "" else: segment_1 = "" segment_2 = "" segment_3 = "" processing_notes = "malformed [SkypeUserName] field" bank_account_details = contact[ 'BankAccountDetails'] if 'BankAccountDetails' in contact else '' tax_number = zero_to_empty( contact['TaxNumber']) if 'TaxNumber' in contact else '' # (ugly) initializer street_city, street_region, street_postalcode, street_country = "", "", "", "" pobox_addressline1, pobox_addressline2, pobox_address_line3, pobox_address_line4, pobox_city, pobox_region = "", "", "", "", "", "" pobox_postal_code, pobox_country, pobox_attention_to = "", "", "" if 'Addresses' in contact and contact['Addresses']: addresses = contact['Addresses'] for address in addresses: if address['AddressType'] == 'STREET': street_city = zero_to_empty( address['City']) if 'City' in address else '' street_region = zero_to_empty( address['Region']) if 'Region' in address else '' street_postalcode = zero_to_empty( address['PostalCode'] ) if 'PostalCode' in address else '' street_country = zero_to_empty( address['Country']) if 'Country' in address else '' elif address['AddressType'] == 'POBOX': pobox_addressline1 = zero_to_empty( address['AddressLine1'] ) if 'AddressLine1' in address else '' pobox_addressline2 = zero_to_empty( address['AddressLine2'] ) if 'AddressLine2' in address else '' pobox_address_line3 = zero_to_empty( address['AddressLine3'] ) if 'AddressLine3' in address else '' pobox_address_line4 = zero_to_empty( address['AddressLine4'] ) if 'AddressLine4' in address else '' pobox_city = zero_to_empty( address['City']) if 'City' in address else '' pobox_region = zero_to_empty( address['Region']) if 'Region' in address else '' pobox_postal_code = zero_to_empty( address['PostalCode'] ) if 'PostalCode' in address else '' pobox_country = zero_to_empty( address['Country']) if 'Country' in address else '' pobox_attention_to = zero_to_empty( address['AttentionTo'] ) if 'AttentionTo' in address else '' else: # TODO : other type of address (write note to log) pass # (ugly) initializer ddi_phone_number, ddi_phone_area_code, ddi_phone_country_code = "", "", "" default_phone_number, default_phone_area_code, default_phone_country_code = "", "", "" fax_phone_number, fax_phone_area_code, fax_phone_country_code = "", "", "" mobile_phone_number, mobile_phone_area_code, mobile_phone_country_code = "", "", "" if 'Phones' in contact and contact['Phones']: phones = contact['Phones'] for phone in phones: if phone['PhoneType'] == 'DDI': ddi_phone_number = zero_to_empty( phone['PhoneNumber'] ) if 'PhoneNumber' in phone else '' ddi_phone_area_code = zero_to_empty( phone['PhoneAreaCode'] ) if 'PhoneAreaCode' in phone else '' ddi_phone_country_code = zero_to_empty( phone['PhoneCountryCode'] ) if 'PhoneCountryCode' in phone else '' elif phone['PhoneType'] == 'DEFAULT': default_phone_number = zero_to_empty( phone['PhoneNumber'] ) if 'PhoneNumber' in phone else '' default_phone_area_code = zero_to_empty( phone['PhoneAreaCode'] ) if 'PhoneAreaCode' in phone else '' default_phone_country_code = zero_to_empty( phone['PhoneCountryCode'] ) if 'PhoneCountryCode' in phone else '' elif phone['PhoneType'] == 'FAX': fax_phone_number = zero_to_empty( phone['PhoneNumber'] ) if 'PhoneNumber' in phone else '' fax_phone_area_code = zero_to_empty( phone['PhoneAreaCode'] ) if 'PhoneAreaCode' in phone else '' fax_phone_country_code = zero_to_empty( phone['PhoneCountryCode'] ) if 'PhoneCountryCode' in phone else '' elif phone['PhoneType'] == 'MOBILE': mobile_phone_number = zero_to_empty( phone['PhoneNumber'] ) if 'PhoneNumber' in phone else '' mobile_phone_area_code = zero_to_empty( phone['PhoneAreaCode'] ) if 'PhoneAreaCode' in phone else '' mobile_phone_country_code = zero_to_empty( phone['PhoneCountryCode'] ) if 'PhoneCountryCode' in phone else '' else: # TODO : other type of phone (write note to log) pass updated_date_utc = clean_date( contact['UpdatedDateUTC'] ) if 'UpdatedDateUTC' in contact else '' is_supplier = contact[ 'IsSupplier'] if 'IsSupplier' in contact else '' is_customer = contact[ 'IsCustomer'] if 'IsCustomer' in contact else '' url = xero_url + contact_id columns = [ contact_id, account_number, contact_status, name, first_name, last_name, email_address, skype_user_name, segment_1, segment_2, segment_3, bank_account_details, tax_number, street_city, street_region, street_postalcode, street_country, pobox_addressline1, pobox_addressline2, pobox_address_line3, pobox_address_line4, pobox_city, pobox_region, pobox_postal_code, pobox_country, pobox_attention_to, default_phone_number, default_phone_area_code, default_phone_country_code, mobile_phone_number, mobile_phone_area_code, mobile_phone_country_code, fax_phone_number, fax_phone_area_code, fax_phone_country_code, ddi_phone_number, ddi_phone_area_code, ddi_phone_country_code, updated_date_utc, is_supplier, is_customer, processing_notes, url ] prep_columns = list( map(lambda col: "\"" + str(col) + "\"", columns)) line = ",".join(prep_columns) + "\n" csv_file.write(line) csv_file.close() self.log.write("INFO [{}] CSV file created {} ({:,} records)".format( data_type, csv_file_name, i)) formats = [ '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'long', '', '', '', '' ] ark = Archiver(self.log) ark.archive(data_type, json_file_name) ark.archive(data_type, csv_file_name) ark.copy(data_type, csv_file_name, 'master') ark.copy(data_type, csv_file_name, 'current', excelize=True, xlsx_formats=formats)