def ilines( file_name=None, include_description=include_description): """ File line iterator """ global categories, prod_ids, category_rows prod_ids = [] category_rows = defaultdict(lambda: []) for i, row in enumerate( Product.select( Product.id, Product.title, Product.description, Product.url) .where(Product.url % "https://www.bukalapak.com/p/%") if file_name is None else (CsvRow(*l) for l in utils.unicode_csv_reader(file_name)) ): prod_ids.append(row.id) category = '/'.join(row.url.split('/')[4:-1]) category_rows[category].append(i) res = row.title if file_name is None and include_description and row.description: res += (' ' + row.description) yield res categories = sorted(category_rows.keys()) return
def test(tbl='logs'): ''' >>> db, d = test('location') for row in d.dict: sql = db.is_duplicate('location', row) if sql: db.query(sql) ''' from utils import unicode_csv_reader, replace_txt, remove_duplicates import tablib db = SimpleDB(os.path.join(os.getcwd(), 'test.db')) if tbl == 'logs': f_name = 'test_eternity.csv' headers = ('day','start_time','stop_time','duration', 'parent','activity','note','tags') elif tbl == 'location': f_name = 'test_gps.csv' headers = ('latitude','longitude','elevation','timestamp') else: raise Exception, "test(args) must = eternity or gps" # get data with open(os.path.join(os.getcwd(), f_name), 'r') as f: #d = list(set([tuple(row) for row in unicode_csv_reader(f)])) d = remove_duplicates([tuple(row) for row in unicode_csv_reader(f)]) data = tablib.Dataset(*d, headers=headers) # TODO - adjust replace_txt() function to accept orderedDicts # since the order of replacement is important. # replacement dicts parent_dict = { u'Media>': u'MEDIA', u'MISC - Real Life>': u'REAL_LIFE', u'Basic Routine>Meals & Snacks>': u'BASIC', u'Basic Routine>': u'BASIC', u'Salubrious Living>': u'HEALTH', } activity_dict = { u'RL - MISC - Home': u'HOME', u'RL - MISC - Outside': u'OUTSIDE', u'へんたい': u'HENTAI', u'アニメ': u'ANIME', u'Grocery Shopping': u'GROCERY-SHOPPING', u'Restaurant': u'RESTAURANT', u'Shower & Bathroom': u'SHOWER-BATHROOM' } # test for duplicates in data (skip over the first row to avoid headers) for row in data.dict[1:]: if tbl == 'logs': row['parent'] = replace_txt(row['parent'], parent_dict) row['activity'] = replace_txt(row['activity'], activity_dict) sql = db.is_duplicate(tbl, row) if sql: db.query(sql)
for i, filename in enumerate(choices): print("\t{0}. {1}".format(i, filename)) else: raise Exception, "\tNO FILES FOUND" print('\ntype the number of the file you want to import') FILENUM = int(raw_input('>>> ')[0]) chosen_file = choices[FILENUM] #----------------------------------------------------------------------------- # DUMP DATA TO DATABASE #----------------------------------------------------------------------------- # get data with open(chosen_file, 'r') as f: d = remove_duplicates([tuple(row) for row in unicode_csv_reader(f)]) # validate and insert the data if table_name == 'logs': insertEternity(d) elif table_name == 'location': insertLocation(d) else: raise Exception, 'neither logs nor location' #----------------------------------------------------------------------------- # VALIDATION AND FORMATTING #-----------------------------------------------------------------------------