def ilines(
        file_name=None,
        include_description=include_description):
    """
    File line iterator
    """
    global categories, prod_ids, category_rows
    prod_ids = []
    category_rows = defaultdict(lambda: [])

    for i, row in enumerate(
            Product.select(
                Product.id,
                Product.title,
                Product.description,
                Product.url)
            .where(Product.url % "https://www.bukalapak.com/p/%") if file_name is None
            else (CsvRow(*l) for l in utils.unicode_csv_reader(file_name)) ):
        prod_ids.append(row.id)
        category = '/'.join(row.url.split('/')[4:-1])
        category_rows[category].append(i)
        res = row.title
        if file_name is None and include_description and row.description:
            res += (' ' + row.description)
        yield res

        categories = sorted(category_rows.keys())

    return
Example #2
0
def test(tbl='logs'):
    '''
    >>> db, d = test('location')
    for row in d.dict:
        sql = db.is_duplicate('location', row)
        if sql: db.query(sql)
    '''
    from utils import unicode_csv_reader, replace_txt, remove_duplicates
    import tablib

    db = SimpleDB(os.path.join(os.getcwd(), 'test.db'))

    if tbl == 'logs':
        f_name  = 'test_eternity.csv'
        headers = ('day','start_time','stop_time','duration',
                   'parent','activity','note','tags')
    elif tbl == 'location':
        f_name  = 'test_gps.csv'
        headers = ('latitude','longitude','elevation','timestamp')
    else:
        raise Exception, "test(args) must = eternity or gps"

    # get data
    with open(os.path.join(os.getcwd(), f_name), 'r') as f:
        #d = list(set([tuple(row) for row in unicode_csv_reader(f)]))
        d = remove_duplicates([tuple(row) for row in unicode_csv_reader(f)])
        data = tablib.Dataset(*d, headers=headers)

    # TODO - adjust replace_txt() function to accept orderedDicts
    #        since the order of replacement is important.

    # replacement dicts
    parent_dict = {
        u'Media>':                        u'MEDIA',
        u'MISC - Real Life>':             u'REAL_LIFE',
        u'Basic Routine>Meals & Snacks>': u'BASIC',
        u'Basic Routine>':                u'BASIC',
        u'Salubrious Living>':            u'HEALTH',
    }
    activity_dict = {
        u'RL - MISC - Home':    u'HOME',
        u'RL - MISC - Outside': u'OUTSIDE',
        u'へんたい':                u'HENTAI',
        u'アニメ':                 u'ANIME',
        u'Grocery Shopping':    u'GROCERY-SHOPPING',
        u'Restaurant':          u'RESTAURANT',
        u'Shower & Bathroom':   u'SHOWER-BATHROOM'
    }

    # test for duplicates in data (skip over the first row to avoid headers)
    for row in data.dict[1:]:
        if tbl == 'logs':
            row['parent']   = replace_txt(row['parent'], parent_dict)
            row['activity'] = replace_txt(row['activity'], activity_dict)

        sql = db.is_duplicate(tbl, row)
        if sql:
            db.query(sql)
Example #3
0
    for i, filename in enumerate(choices):
        print("\t{0}. {1}".format(i, filename))
else:
    raise Exception, "\tNO FILES FOUND"

print('\ntype the number of the file you want to import')
FILENUM     = int(raw_input('>>> ')[0])
chosen_file = choices[FILENUM]

#-----------------------------------------------------------------------------
# DUMP DATA TO DATABASE
#-----------------------------------------------------------------------------

# get data
with open(chosen_file, 'r') as f:
    d = remove_duplicates([tuple(row) for row in unicode_csv_reader(f)])

# validate and insert the data
if table_name == 'logs':
    insertEternity(d)
elif table_name == 'location':
    insertLocation(d)
else:
    raise Exception, 'neither logs nor location'


#-----------------------------------------------------------------------------
# VALIDATION AND FORMATTING
#-----------------------------------------------------------------------------