if error.message == 'timed out': n = 'TIMEDOUT' else: if pass_errors: n = 'ERROR' else: import traceback; print traceback.format_exc() raise error #return n.decode(d['encoding']).encode('utf-8'), return n,url,lprime, return_source if __name__=='__main__': from multiprocessing import Pool os.chdir('fb') webpages = [d for d in os.listdir('.') if re.match(r'fbsearch_results\d\d+',d)] webpages.sort() connection = ersatz.db_connect(ERSATZPG_CONFIG) cursor = connection.cursor() for w in webpages: print w with open(w) as f: t = time.time() pool = Pool(processes=20) buf = StringIO() csvw = csv.writer(buf) csvr = csv.reader(f) def callb(nurl): n,url,lprime,return_source=nurl csvw.writerow([lprime[0],lprime[2],url,n,repr(return_source)]) csvr.next() for l in csvr: pool.apply_async(get_page_for_class,[l[1],l], callback=callb)
if state_ins == 'all': state_ins = all_states do_all = ['-clean','-partition','-clean_import','-build','-distinct','-unions','-rekey','-export'] do_all_no_clean =['-clean_import','-build','-distinct','-unions','-rekey','-export'] if '-all' in sys.argv: sys.argv = sys.argv[:-1] + do_all + [sys.argv[-1]] if '-all_no_clean' in sys.argv: sys.argv = sys.argv[:-1] + do_all_no_clean + [sys.argv[-1]] tables, enums, fks, seqs = process_schema.rip_schema('schema/bip_model_reduced.sql') #table_tools.define_long_tables(tables, fks) if '-clean' in sys.argv: t =time.time() connection = ersatz.db_connect(univ_settings.ERSATZPG_CONFIG) # table_tools.delete_pksq(connection) # table_tools.create_pksq(connection) table_tools.delete_enums(connection) table_tools.create_enums(connection) table_tools.delete_tables(tables, connection) table_tools.create_tables(tables, connection) table_tools.delete_import_tables(bipbuild_conf.ACTUAL_TABLES, bipbuild_conf.UNIONS, connection) table_tools.create_import_tables(bipbuild_conf.ACTUAL_TABLES, tables, connection) connection.commit() connection.close() t = time.time() - t print "Elapsed: %s" % (t,) if '-partition' in sys.argv: t =time.time() connection = ersatz.db_connect(bipbuild_conf.ERSATZPG_CONFIG)
import os,sys from collections import OrderedDict os.chdir('..') sys.path.append('.') from ersatzpg import ersatz,ersatz_threaded, create_partitions as cp from examples import test_part conf = test_part.ERSATZPG_CONFIG connection = ersatz.db_connect(conf) #create our tables into which we want to import connection.cursor().execute("drop table if exists part_test cascade") connection.cursor().execute("drop table if exists part_test2 cascade") connection.cursor().execute("create table part_test(column1 int, column2 int, column3 int, column4 int, column5 int);") connection.cursor().execute("create table part_test2(column6 int, column7 int, column8 int, column9 int, column10 int);") #create table partitions into which we want to import cp.create_discrete_partitions(['part_test'], conf['tables']['part_test']['partitions'], connection.cursor()) od = OrderedDict([ ('column6',(0,1,2,3,4,5,6,7,8,9)), ('column7',(0,1,2,3,4,5,6,7,8,9)), ]) cp.create_discrete_partitions(['part_test2'], od, connection.cursor()) connection.commit() connection.close() #run the import #ersatz.new_process_copies(test_part) ersatz_threaded.new_process_copies(test_part)
import os, sys from collections import OrderedDict os.chdir('..') sys.path.append('.') from ersatzpg import ersatz, ersatz_threaded, create_partitions as cp from examples import test_part conf = test_part.ERSATZPG_CONFIG connection = ersatz.db_connect(conf) #create our tables into which we want to import connection.cursor().execute("drop table if exists part_test cascade") connection.cursor().execute("drop table if exists part_test2 cascade") connection.cursor().execute( "create table part_test(column1 int, column2 int, column3 int, column4 int, column5 int);" ) connection.cursor().execute( "create table part_test2(column6 int, column7 int, column8 int, column9 int, column10 int);" ) #create table partitions into which we want to import cp.create_discrete_partitions(['part_test'], conf['tables']['part_test']['partitions'], connection.cursor()) od = OrderedDict([ ('column6', (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)), ('column7', (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)), ]) cp.create_discrete_partitions(['part_test2'], od, connection.cursor()) connection.commit() connection.close() #run the import