class DbStorage(Storage): def __init__(self, db=''): self.db = DBGateway(db + '.sqlite') def open(self): try: self.db.create_table(TABLE, cols=[ ' '.join([col, 'TEXT']) for col in COLUMNS], primary=PRIMARY) except Exception: return False else: return True def store(self, data_dict): data_tuple = self.align_kwargs(data_dict) if not self.db.insert(TABLE, cols=COLUMNS, vals=data_tuple): err_tuple = (data_dict.get('pdf_md5'), 'DB_ERROR: %s' % self.db.get_error()) self.db.insert(TABLE, cols=['pdf_md5', 'errors'], vals=err_tuple) def close(self): self.db.disconnect() def contains(self, key, val): return self.db.count(TABLE, key, val)
class DbStorage(Storage): def __init__(self, db=''): self.db = DBGateway(db + '.sqlite') def open(self): try: self.db.create_table( TABLE, cols=[' '.join([col, 'TEXT']) for col in COLUMNS], primary=PRIMARY) except Exception: return False else: return True def store(self, data_dict): data_tuple = self.align_kwargs(data_dict) if not self.db.insert(TABLE, cols=COLUMNS, vals=data_tuple): err_tuple = (data_dict.get('pdf_md5'), 'DB_ERROR: %s' % self.db.get_error()) self.db.insert(TABLE, cols=['pdf_md5', 'errors'], vals=err_tuple) def close(self): self.db.disconnect() def contains(self, key, val): return self.db.count(TABLE, key, val)
def __init__(self, db=''): self.db = DBGateway(db + '.sqlite')
print 'Mapping: %s' % func.name if func.setup: print '\tsetup:\t%s\n\t\t%s' % func.setup if not self.setup(func.setup): continue while func.proceed: if not self.db.query(func.query_cmd()): sys.stderr.write("query: %s\n" % self.db.get_error()) else: func.run(self.db.db_curr.fetchone()) if not self.db.query(func.update, func.subs): sys.stderr.write("update: %s\n" % self.db.get_error()) if __name__ == '__main__': try: dbpath = sys.argv[1] if not os.path.exists(dbpath): raise IndexError MIN = sys.argv[2] MAX = sys.argv[3] except IndexError as e: print 'Invalid args: %s' % e sys.exit(0) else: db = DBGateway(os.path.basename(dbpath), os.path.dirname(dbpath)) functions = [ JSHasher(MIN, MAX), ] mapper = Mapper(db, functions) mapper.start() db.disconnect()
db_name = 'clarified-%s-%d.sqlite' % (JSFLASH, PID) logmsg(log, 'Creating: %s\n\n' % db_name) if JSFLASH == 'js': cmd = "select rowid, pdf_md5, tree, obf_js from parsed_pdfs where obf_js is not '' and de_js is '' and (rowid > %s and rowid <= %s) order by rowid limit 1" % ('%s', MAX) update = "update parsed_pdfs set de_js='%s' where rowid is %s" % (db_name, '%s') elif JSFLASH == 'flash': cmd = "select rowid, pdf_md5, tree, swf from parsed_pdfs where swf is not '' and (rowid > %s and rowid <= %s) order by rowid limit 1" % ('%s', MAX) update = "update parsed_pdfs set actionscript='%s' where rowid is %s" % (db_name, '%s') logmsg(log, "%s\n%s\n" % (cmd, update)) jsopts = default_options() jsopts.preserve_new_lines = False jsopts.break_chained_methods = True DB = DBGateway(DBin_NAME, '/media/sf_voodo_db/') DBout = DBGateway(db_name , '/media/sf_voodo_db/') if not DBout.query('create table if not exists clarified (pdf_md5 TEXT, js TEXT, de_js TEXT, de_js_sdhash TEXT, swf TEXT, abc TEXT, actionscript TEXT, actionscript_sdhash TEXT, primary key(pdf_md5))'): err = DBout.get_error() logmsg(log, "%s\n" % err) sys.exit(1) ''' Create an lxml tree from the xml string ''' def tree_from_xml(xml): try: return ET.fromstring(xml) except Exception: return None
cmd = "select rowid, pdf_md5, tree, obf_js from parsed_pdfs where obf_js is not '' and de_js is '' and (rowid > %s and rowid <= %s) order by rowid limit 1" % ( '%s', MAX) update = "update parsed_pdfs set de_js='%s' where rowid is %s" % (db_name, '%s') elif JSFLASH == 'flash': cmd = "select rowid, pdf_md5, tree, swf from parsed_pdfs where swf is not '' and (rowid > %s and rowid <= %s) order by rowid limit 1" % ( '%s', MAX) update = "update parsed_pdfs set actionscript='%s' where rowid is %s" % ( db_name, '%s') logmsg(log, "%s\n%s\n" % (cmd, update)) jsopts = default_options() jsopts.preserve_new_lines = False jsopts.break_chained_methods = True DB = DBGateway(DBin_NAME, '/media/sf_voodo_db/') DBout = DBGateway(db_name, '/media/sf_voodo_db/') if not DBout.query( 'create table if not exists clarified (pdf_md5 TEXT, js TEXT, de_js TEXT, de_js_sdhash TEXT, swf TEXT, abc TEXT, actionscript TEXT, actionscript_sdhash TEXT, primary key(pdf_md5))' ): err = DBout.get_error() logmsg(log, "%s\n" % err) sys.exit(1) ''' Create an lxml tree from the xml string ''' def tree_from_xml(xml): try: return ET.fromstring(xml)
print '\tsetup:\t%s\n\t\t%s' % func.setup if not self.setup(func.setup): continue while func.proceed: if not self.db.query(func.query_cmd()): sys.stderr.write("query: %s\n" % self.db.get_error()) else: func.run(self.db.db_curr.fetchone()) if not self.db.query(func.update, func.subs): sys.stderr.write("update: %s\n" % self.db.get_error()) if __name__ == '__main__': try: dbpath = sys.argv[1] if not os.path.exists(dbpath): raise IndexError MIN = sys.argv[2] MAX = sys.argv[3] except IndexError as e: print 'Invalid args: %s' % e sys.exit(0) else: db = DBGateway(os.path.basename(dbpath), os.path.dirname(dbpath)) functions = [ JSHasher(MIN, MAX), ] mapper = Mapper(db, functions) mapper.start() db.disconnect()