Exemple #1
0
        if error.message == 'timed out':
            n = 'TIMEDOUT'
        else:
            if pass_errors:
                n = 'ERROR'
            else:
                import traceback; print traceback.format_exc()
                raise error
    #return n.decode(d['encoding']).encode('utf-8'),
    return n,url,lprime, return_source
if __name__=='__main__':
    from multiprocessing import Pool
    os.chdir('fb')
    webpages = [d for d in os.listdir('.') if re.match(r'fbsearch_results\d\d+',d)]
    webpages.sort()
    connection = ersatz.db_connect(ERSATZPG_CONFIG)
    cursor = connection.cursor()
    for w in webpages:
        print w
        with open(w) as f:
            t = time.time()
            pool = Pool(processes=20)
            buf = StringIO()
            csvw = csv.writer(buf)
            csvr = csv.reader(f)
            def callb(nurl):
                n,url,lprime,return_source=nurl
                csvw.writerow([lprime[0],lprime[2],url,n,repr(return_source)])
            csvr.next()
            for l in csvr:
                pool.apply_async(get_page_for_class,[l[1],l], callback=callb)
Exemple #2
0
if state_ins == 'all':
    state_ins = all_states
do_all = ['-clean','-partition','-clean_import','-build','-distinct','-unions','-rekey','-export']
do_all_no_clean =['-clean_import','-build','-distinct','-unions','-rekey','-export']
if '-all' in sys.argv:
    sys.argv = sys.argv[:-1] + do_all + [sys.argv[-1]]

if '-all_no_clean' in sys.argv:
    sys.argv = sys.argv[:-1] + do_all_no_clean + [sys.argv[-1]]

tables, enums, fks, seqs = process_schema.rip_schema('schema/bip_model_reduced.sql')
#table_tools.define_long_tables(tables, fks)

if '-clean' in sys.argv:
    t =time.time()
    connection = ersatz.db_connect(univ_settings.ERSATZPG_CONFIG)
#    table_tools.delete_pksq(connection)
#    table_tools.create_pksq(connection)
    table_tools.delete_enums(connection)
    table_tools.create_enums(connection)
    table_tools.delete_tables(tables, connection)
    table_tools.create_tables(tables, connection)
    table_tools.delete_import_tables(bipbuild_conf.ACTUAL_TABLES, bipbuild_conf.UNIONS, connection)
    table_tools.create_import_tables(bipbuild_conf.ACTUAL_TABLES, tables, connection)
    connection.commit()
    connection.close()
    t = time.time() - t
    print "Elapsed: %s" % (t,)
if '-partition' in sys.argv:
    t =time.time()
    connection = ersatz.db_connect(bipbuild_conf.ERSATZPG_CONFIG)
Exemple #3
0
import os,sys
from collections import OrderedDict
os.chdir('..')
sys.path.append('.')
from ersatzpg import ersatz,ersatz_threaded, create_partitions as cp
from examples import test_part
conf = test_part.ERSATZPG_CONFIG
connection = ersatz.db_connect(conf)
#create our tables into which we want to import
connection.cursor().execute("drop table if exists part_test cascade")
connection.cursor().execute("drop table if exists part_test2 cascade")
connection.cursor().execute("create table part_test(column1 int, column2 int, column3 int, column4 int, column5 int);")
connection.cursor().execute("create table part_test2(column6 int, column7 int, column8 int, column9 int, column10 int);")

#create table partitions into which we want to import
cp.create_discrete_partitions(['part_test'], conf['tables']['part_test']['partitions'], connection.cursor())
od = OrderedDict([
        ('column6',(0,1,2,3,4,5,6,7,8,9)),
        ('column7',(0,1,2,3,4,5,6,7,8,9)),
        ])
cp.create_discrete_partitions(['part_test2'], od, connection.cursor())
connection.commit()
connection.close()

#run the import
#ersatz.new_process_copies(test_part)
ersatz_threaded.new_process_copies(test_part)
Exemple #4
0
import os, sys
from collections import OrderedDict
os.chdir('..')
sys.path.append('.')
from ersatzpg import ersatz, ersatz_threaded, create_partitions as cp
from examples import test_part
conf = test_part.ERSATZPG_CONFIG
connection = ersatz.db_connect(conf)
#create our tables into which we want to import
connection.cursor().execute("drop table if exists part_test cascade")
connection.cursor().execute("drop table if exists part_test2 cascade")
connection.cursor().execute(
    "create table part_test(column1 int, column2 int, column3 int, column4 int, column5 int);"
)
connection.cursor().execute(
    "create table part_test2(column6 int, column7 int, column8 int, column9 int, column10 int);"
)

#create table partitions into which we want to import
cp.create_discrete_partitions(['part_test'],
                              conf['tables']['part_test']['partitions'],
                              connection.cursor())
od = OrderedDict([
    ('column6', (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)),
    ('column7', (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)),
])
cp.create_discrete_partitions(['part_test2'], od, connection.cursor())
connection.commit()
connection.close()

#run the import