def doprocess(argv): filelimit = 50 word = "local" localanalysis = "no" failedfiles = [] emails = [] if len(sys.argv) < 3: usage() try: opts, args = getopt.getopt(argv, "l:d:f:h:n:t:o:") except getopt.GetoptError: usage() for opt, arg in opts: if opt == '-d': word = arg elif opt == '-t': filetypes = [] if arg.count(",") != 0: filetypes = arg.split(",") else: filetypes.append(arg) print filetypes elif opt == '-l': limit = int(arg) elif opt == '-h': localanalysis = arg elif opt == '-n': filelimit = int(arg) elif opt == '-o': dir = arg elif opt == '-f': outhtml = arg if os.path.exists(dir): pass else: os.mkdir(dir) if localanalysis == "no": print "\n[-] Starting online search..." for filetype in filetypes: print "\n[-] Searching for " + filetype + " files, with a limit of " + str( limit) search = googlesearch.search_google(word, limit, start, filetype) search.process_files() files = search.get_files() print "Results: " + str(len(files)) + " files found" print "Starting to download " + str(filelimit) + " of them:" print "----------------------------------------\n" counter = 1 for x in files: if counter <= filelimit: print "[" + str(counter) + "/" + str(filelimit) + "] " + x getfile = downloader.downloader(x, dir) getfile.down() filename = getfile.name() if filename != "": if filetype == "pdf": test = metadataPDF.metapdf(dir + "/" + filename, password) elif filetype == "doc" or filetype == "ppt" or filetype == "xls": test = metadataMSOffice.metaMs2k(dir + "/" + filename) if os.name == "posix": testex = metadataExtractor.metaExtractor( dir + "/" + filename) elif filetype == "docx" or filetype == "pptx" or filetype == "xlsx": test = metadataMSOfficeXML.metaInfoMS(dir + "/" + filename) res = test.getData() if res == "ok": raw = test.getRaw() users = test.getUsers() paths = test.getPaths() soft = test.getSoftware() email = [] if filetype == "pdf" or filetype == "docx": res = test.getTexts() if res == "ok": email = test.getEmails() for em in email: emails.append(em) else: email = [] failedfiles.append(x + ":" + str(res)) respack = [x, users, paths, soft, raw, email] all.append(respack) else: failedfiles.append(x + ":" + str(res)) print "\t [x] Error in the parsing process" #A error in the parsing process else: pass counter += 1 else: print "[-] Starting local analysis in directory " + dir dirList = os.listdir(dir) print dirList for filename in dirList: if filename != "": filetype = str(filename.split(".")[-1]) if filetype == "pdf": test = metadataPDF.metapdf(dir + "/" + filename, password) elif filetype == "doc" or filetype == "ppt" or filetype == "xls": print "doc" test = metadataMSOffice.metaMs2k(dir + "/" + filename) if os.name == "posix": testex = metadataExtractor.metaExtractor(dir + "/" + filename) elif filetype == "docx" or filetype == "pptx" or filetype == "xlsx": test = metadataMSOfficeXML.metaInfoMS(dir + "/" + filename) res = test.getData() if res == "ok": raw = test.getRaw() users = test.getUsers() paths = test.getPaths() soft = test.getSoftware() if (filetype == "doc" or filetype == "xls" or filetype == "ppt") and os.name == "posix": testex.runExtract() testex.getData() paths.extend(testex.getPaths()) respack = [filename, users, paths, soft, raw, email] all.append(respack) else: failedfiles.append(filename + ":" + str(res)) print "[x] Error in the parsing process" # A error in the parsing process if filetype == "docx" or filetype == "pdf": res = test.getTexts() if res == "ok": email = test.getEmails() for x in email: emails.append(x) else: failedfiles(filename + ":" + str(res)) else: print "pass" else: pass print "processing" proc = processor.processor(all) userlist = proc.sort_users() softlist = proc.sort_software() pathlist = proc.sort_paths() try: html = htmlExport.htmlExport(userlist, softlist, pathlist, all, outhtml, dir, failedfiles, word, emails) save = html.writehtml() except Exception, e: print e print "Error creating the file"
def post(self, target): filelimit = 50 word = "local" localanalysis = "no" failedfiles = [] emails = [] ''' if len(sys.argv) < 3: usage() try: opts, args = getopt.getopt(argv, "l:d:f:h:n:t:o:") except getopt.GetoptError: usage() ''' opts = [('-d', target), ('-t', 'pdf,doc'), ('-l', '200'), ('-n', '50'), ('-o', 'downloadfiles'), ('-f', 'resultsss.html')] for opt, arg in opts: if opt == '-d': word = target elif opt == '-t': filetypes = [] if arg.count(",") != 0: filetypes = arg.split(",") else: filetypes.append(arg) print filetypes elif opt == '-l': limit = int(arg) elif opt == '-h': localanalysis = arg elif opt == '-n': filelimit = int(arg) elif opt == '-o': dir = arg elif opt == '-f': outhtml = arg if os.path.exists(dir): pass else: os.mkdir(dir) if localanalysis == "no": print "\n[-] Starting online search..." for filetype in filetypes: print "\n[-] Searching for " + filetype + " files, with a limit of " + str( limit) try: search = googlesearch.search_google( word, limit, start, filetype) search.process_files() files = search.get_files() print "Results: " + str(len(files)) + " files found" print "Starting to download " + str( filelimit) + " of them:" print "----------------------------------------\n" counter = 1 for x in files: if counter <= filelimit: print "[" + str(counter) + "/" + str( filelimit) + "] " + x getfile = downloader.downloader(x, dir) getfile.down() filename = getfile.name() if filename != "": if filetype == "pdf": test = metadataPDF.metapdf( dir + "/" + filename, password) elif filetype == "doc" or filetype == "ppt" or filetype == "xls": test = metadataMSOffice.metaMs2k(dir + "/" + filename) if os.name == "posix": testex = metadataExtractor.metaExtractor( dir + "/" + filename) elif filetype == "docx" or filetype == "pptx" or filetype == "xlsx": test = metadataMSOfficeXML.metaInfoMS( dir + "/" + filename) res = test.getData() if res == "ok": raw = test.getRaw() users = test.getUsers() paths = test.getPaths() soft = test.getSoftware() email = [] if filetype == "pdf" or filetype == "docx": res = test.getTexts() if res == "ok": email = test.getEmails() for em in email: emails.append(em) else: email = [] failedfiles.append(x + ":" + str(res)) respack = [ x, users, paths, soft, raw, email ] all.append(respack) else: failedfiles.append(x + ":" + str(res)) print "\t [x] Error in the parsing process" #A error in the parsing process else: pass counter += 1 except socket.error, ex: print ex
def doprocess(argv): filelimit = 50 word = "local" localanalysis = "no" failedfiles = [] emails = [] if len(sys.argv) < 3: usage() try: opts, args = getopt.getopt(argv, "l:d:f:h:n:t:o:") except getopt.GetoptError: usage() for opt, arg in opts: if opt == '-d': word = arg elif opt == '-t': filetypes = [] if arg.count(",") != 0: filetypes = arg.split(",") else: filetypes.append(arg) print filetypes elif opt == '-l': limit = int(arg) elif opt == '-h': localanalysis = arg elif opt == '-n': filelimit = int(arg) elif opt == '-o': dir = arg elif opt == '-f': outhtml = arg if os.path.exists(dir): pass else: os.mkdir(dir) if localanalysis == "no": print "\n[-] Starting online search..." for filetype in filetypes: print "\n[-] Searching for "+ filetype + " files, with a limit of " + str(limit) search = googlesearch.search_google(word, limit, start, filetype) search.process_files() files = search.get_files() print "Results: " + str(len(files)) + " files found" print "Starting to download " + str(filelimit) + " of them:" print "----------------------------------------\n" counter = 1 for x in files: if counter <= filelimit: print "[" + str(counter) + "/" + str(filelimit) + "] " + x getfile = downloader.downloader(x, dir) getfile.down() filename = getfile.name() if filename != "": if filetype == "pdf": test = metadataPDF.metapdf(dir + "/" + filename, password) elif filetype == "doc" or filetype == "ppt" or filetype == "xls": test = metadataMSOffice.metaMs2k(dir + "/" + filename) if os.name == "posix": testex = metadataExtractor.metaExtractor(dir + "/" + filename) elif filetype == "docx" or filetype == "pptx" or filetype == "xlsx": test = metadataMSOfficeXML.metaInfoMS(dir + "/" + filename) res = test.getData() if res == "ok": raw = test.getRaw() users = test.getUsers() paths = test.getPaths() soft = test.getSoftware() email = [] if filetype == "pdf" or filetype == "docx": res = test.getTexts() if res == "ok": email = test.getEmails() for em in email: emails.append(em) else: email = [] failedfiles.append(x + ":" + str(res)) respack=[x, users, paths, soft, raw, email] all.append(respack) else: failedfiles.append(x + ":" + str(res)) print "\t [x] Error in the parsing process" #A error in the parsing process else: pass counter += 1 else: print "[-] Starting local analysis in directory " + dir dirList = os.listdir(dir) print dirList for filename in dirList: if filename != "": filetype = str(filename.split(".")[-1]) if filetype == "pdf": test = metadataPDF.metapdf(dir + "/" + filename, password) elif filetype == "doc" or filetype == "ppt" or filetype == "xls": print "doc" test = metadataMSOffice.metaMs2k(dir + "/" + filename) if os.name == "posix": testex = metadataExtractor.metaExtractor(dir + "/" + filename) elif filetype == "docx" or filetype == "pptx" or filetype == "xlsx": test = metadataMSOfficeXML.metaInfoMS(dir + "/" + filename) res = test.getData() if res == "ok": raw = test.getRaw() users = test.getUsers() paths = test.getPaths() soft = test.getSoftware() if (filetype == "doc" or filetype == "xls" or filetype == "ppt") and os.name=="posix": testex.runExtract() testex.getData() paths.extend(testex.getPaths()) respack = [filename, users, paths, soft, raw, email] all.append(respack) else: failedfiles.append(filename + ":" + str(res)) print "[x] Error in the parsing process" # A error in the parsing process if filetype == "docx" or filetype == "pdf": res = test.getTexts() if res == "ok": email = test.getEmails() for x in email: emails.append(x) else: failedfiles(filename + ":" + str(res)) else: print "pass" else: pass print "processing" proc = processor.processor(all) userlist = proc.sort_users() softlist = proc.sort_software() pathlist = proc.sort_paths() try: html = htmlExport.htmlExport(userlist, softlist, pathlist, all, outhtml, dir, failedfiles, word, emails) save = html.writehtml() except Exception, e: print e print "Error creating the file"
def doprocess(argv): localanalysis= "no" if len(sys.argv) < 3: usage() try: opts,args = getopt.getopt(argv,"l:d:f:h:n:t:o:") except getopt.GetoptError: usage() for opt,arg in opts: if opt == '-d': word = arg elif opt == '-t': filetypes=[] if arg.count(",") != 0: filetypes = arg.split(",") else: filetypes.append(arg) print filetypes elif opt == '-l': limit = int(arg) elif opt == '-h': localanalysis=arg elif opt == '-n': filelimit = int(arg) elif opt == '-o': dir = arg elif opt == '-f': outhtml = arg if os.path.exists(dir): pass else: os.mkdir(dir) if localanalysis == "no": print "[-] Starting online search..." for filetype in filetypes: print "\n[-] Searching for "+filetype+ " files, with a limit of " + str(limit) search=googlesearch.search_google(word,limit,start,filetype) search.process_files() files=search.get_files() print "Results: " + str(len(files)) + " files found" print "Starting to download "+ str(filelimit) + " of them.." print "----------------------------------------------------\n" counter=0 for x in files: if counter <= filelimit: print "["+str(counter+1)+"/"+str(filelimit)+"] " + x getfile=downloader.downloader(x,dir) getfile.down() filename=getfile.name() try: if filename !="": if filetype == "pdf": test=metadataPDF.metapdf(dir+"/"+filename,password) elif filetype == "doc" or filetype == "ppt" or filetype == "xls": test=metadataMSOffice.metaMs2k(dir+"/"+filename) if os.name=="posix": testex=metadataExtractor.metaExtractor(dir+"/"+filename) elif filetype == "docx" or filetype == "pptx" or filetype == "xlsx": test=metadataMSOfficeXML.metaInfoMS(dir+"/"+filename) res=test.getData() if res=="ok": raw=test.getRaw() users=test.getUsers() paths=test.getPaths() soft=test.getSoftware() if (filetype == "doc" or filetype == "xls" or filetype == "ppt") and os.name=="posix": testex.runExtract() testex.getData() paths.extend(testex.getPaths()) respack=[x,users,paths,soft,raw] all.append(respack) else: print "error" #A error in the parsing process else: print "pass" except Exception, e: print("ERROR: "+str(e)) counter+=1