def InsertHosts(_sharedict): for (host, share) in _sharedict.iteritems(): self.__cursor.execute(""" INSERT INTO shares (scantype_id, network, protocol, hostname, hostaddr, port, state) VALUES (%(st)s, %(net)s, %(proto)s, %(host)s, inet %(addr)s, %(port)s, 'online') """, {'st': share.scantype, 'net': self.__network, 'proto': share.proto, 'host': share.host, 'addr': share.Addr(), 'port': share.port}) try: os.unlink(share_save_path(share.proto, share.host, share.port)) except: pass
def update_share(share, cursor): print "Updating hash for %(proto)s://%(host)s:%(port)s" \ % {'proto': share['protocol'], 'port': str(share['port']), 'host': share['hostname']} try: hash = hashlib.md5() for line in open(share_save_path(share['protocol'], share['hostname'], share['port']), "rt"): hash.update(line) except: cursor.execute("UPDATE trees SET hash='' WHERE tree_id=%(i)s", {'i': share['tree_id']}) return cursor.execute("UPDATE trees SET hash=%(h)s WHERE tree_id=%(i)s", {'h': hash.hexdigest(), 'i': share['tree_id']})
def scan_share(db, share_id, proto, host, port, tree_id, command): db.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_READ_COMMITTED) cursor = db.cursor() hoststr = sharestr(proto, host, port) try: # asquire lock on the column from trees table cursor.execute("SELECT hash FROM ONLY trees WHERE tree_id=%(t)s FOR UPDATE NOWAIT", {'t': tree_id}) oldhash = cursor.fetchone()['hash'] except: # if other spider instance didn't complete scanning, do nothing # side effect is backing-off the next scan log("Scanning %s is running too long in another spider instance or database error.", (hoststr,)) db.rollback() return savepath = share_save_path(proto, host, port) patchmode = oldhash != None and os.path.isfile(savepath) try: address = socket.gethostbyname(host) except: log("Name resolution failed for %s.", (hoststr,)) db.rollback() return log("Scanning %s (%s) ...", (hoststr, address)) start = datetime.datetime.now() if patchmode: data = run_scanner(command, address, proto, port, "-u " + quote_for_shell(savepath)) else: data = run_scanner(command, address, proto, port) save = tempfile.TemporaryFile(bufsize = -1) line_count = 0 line_count_patch = 0 hash = hashlib.md5() for line in data.stdout: line_count += 1 if line[0] in ('+', '-', '*'): line_count_patch += 1 if line_count > max_lines_from_scanner: kill_process(data) data.stdout.close() data.wait() log("Scanning %s failed. Too many lines from scanner (elapsed time %s).", (hoststr, datetime.datetime.now() - start)) db.rollback() return hash.update(line) save.write(line) if data.wait() != 0: cursor.execute(""" UPDATE shares SET next_scan = now() + %(w)s WHERE share_id = %(s)s; """, {'s': share_id, 'w': wait_until_next_scan_failed}) log("Scanning %s failed with return code %s (elapsed time %s).", (hoststr, data.returncode, datetime.datetime.now() - start)) db.commit() return if patchmode and (line_count_patch > (line_count - line_count_patch) / patch_fallback): log("Patch is too long for %s (patch %s, non-patch %s). Fallback to non-patching mode", (hoststr, line_count_patch, line_count - line_count_patch)) patchmode = False scan_time = datetime.datetime.now() - start start = datetime.datetime.now() qcache = PsycoCache(cursor) paths_buffer = dict() save.seek(0) if patchmode and (oldhash == None or save.readline() != "* " + oldhash + "\n"): save.seek(0) patchmode = False log("MD5 digest from scanner doesn't match the one from the database. Fallback to non-patching mode.") if patchmode: cursor.execute(""" CREATE TEMPORARY TABLE newfiles ( LIKE files INCLUDING DEFAULTS ) ON COMMIT DROP; CREATE INDEX newfiles_path ON newfiles(treepath_id); """) for line in save: if line[0] not in ('+', '-', '*'): break scan_line_patch(cursor, tree_id, line.strip('\n'), qcache, paths_buffer) for (dirid, pinfo) in paths_buffer.iteritems(): if pinfo.modify: qcache.append("SELECT push_path_files(%(t)s, %(d)s)", {'t': tree_id, 'd': dirid}) else: cursor.execute("DELETE FROM paths WHERE tree_id = %(t)s", {'t':tree_id}) for line in save: if line[0] in ('+', '-', '*'): continue scan_line_patch(cursor, tree_id, "+ " + line.strip('\n'), qcache, paths_buffer) qcache.allcommit() try: if os.path.isfile(savepath): shutil.move(savepath, savepath + ".old") save.seek(0) file = open(savepath, 'wb') shutil.copyfileobj(save, file) file.close() except: log("Failed to save contents of %s to file %s.", (hoststr, savepath)) traceback.print_exc() save.close() cursor.execute(""" UPDATE shares SET last_scan = now(), next_scan = now() + %(w)s WHERE share_id = %(s)s; UPDATE trees SET hash = %(h)s WHERE tree_id = %(t)s; """, {'s': share_id, 't': tree_id, 'h': hash.hexdigest(), 'w': wait_until_next_scan}) if qcache.totalsize >= 0: cursor.execute(""" UPDATE shares SET size = %(sz)s WHERE share_id = %(s)s; """, {'s':share_id, 'sz': qcache.totalsize}) db.commit() if patchmode: deleted = qcache.stat_pdelete + qcache.stat_fdelete added = qcache.stat_padd + qcache.stat_fadd modified = qcache.stat_fmodify log("Scanning %s succeded. Database updated in patching mode: delete %s, add %s, modify %s (scan time %s, update time %s).", (hoststr, str(deleted), str(added), str(modified), scan_time, datetime.datetime.now() - start)) else: log("Scanning %s succeded. Database updated in non-patching mode (scan time %s, update time %s).", (hoststr, scan_time, datetime.datetime.now() - start))
break id, tree_id, proto, host, port, command = shares.fetchone() shares.execute(""" UPDATE shares SET next_scan = now() + %(w)s WHERE share_id = %(s)s AND (next_scan IS NULL OR next_scan < now()) """, {'s':id, 'w': wait_until_next_scan}) if shares.statusmessage != 'UPDATE 1': continue try: scan_share(db, id, proto, host, port, tree_id, command) except KeyboardInterrupt: log("Interrupted by user. Exiting") db.rollback() sys.exit(0) except psycopg2.IntegrityError: now = int(time.time()) log("SQL Integrity violation while scanning %s. Rename old contents with suffix %s. Next scan to be in non-patching mode", (sharestr(proto, host, port), now)) traceback.print_exc() db.rollback() savepath = share_save_path(proto, host, port) if os.path.isfile(savepath): shutil.move(savepath, savepath + "." + str(now)) savepath += ".old" if os.path.isfile(savepath): shutil.move(savepath, savepath + "." + str(now)) except: log("Scanning %s failed with a crash. Something unexpected happened. Exception trace:", sharestr(proto, host, port)) traceback.print_exc() db.rollback()
if __name__ == "__main__": if len(set(('?','-?','/?','-h','/h','help','--help')).intersection(sys.argv)) > 0: print sys.argv[0], '[diff [only]]' print "diff\tdump with diffs" print "only\tleave only dumps with non-empty diffs" try: db = connectdb() except: print "Unable to connect to the database, exiting." sys.exit() shares = db.cursor() shares.execute(""" SELECT share_id, protocol, hostname, port FROM shares """) create_dump_dir() skipmode = 'only' in sys.argv if 'diff' in sys.argv: for share in shares.fetchall(): savepath = share_save_path(share['protocol'], share['hostname'], share['port']) if os.path.isfile(savepath): dump = dump_share(share,savepath) if skipmode and zero_diff(dump): os.unlink(dump) elif not skipmode: dump_share(share) else: for share in shares.fetchall(): dump_share(share)
engine_name = type(lookuper).__name__; try: lookuper() lookuper.commit() except UserWarning: pass except: log("Exception in engine '%s' (network \"%s\")", (engine_name, net)) traceback.print_exc() except UserWarning: pass except: log("Exception at network \"%s\" lookup", net) traceback.print_exc() cursor = db.cursor() cursor.execute(""" DELETE FROM shares WHERE ((size = 0) AND (last_lookup + interval %(tz)s < now())) OR ((size != 0) AND (last_lookup + interval %(tnz)s < now())) RETURNING protocol, hostname, port """, {'tz': wait_until_delete_empty_share, 'tnz': wait_until_delete_share}) for delrow in cursor.fetchall(): try: os.unlink(share_save_path(delrow['protocol'], delrow['hostname'], delrow['port'])) except: pass log("All network lookups finished (running time %s)", datetime.datetime.now() - start)