Exemple #1
0
def doprocess(argv):
    filelimit = 50
    word = "local"
    localanalysis = "no"
    failedfiles = []
    emails = []

    if len(sys.argv) < 3:
        usage()
    try:
        opts, args = getopt.getopt(argv, "l:d:f:h:n:t:o:")
    except getopt.GetoptError:
        usage()
    for opt, arg in opts:
        if opt == '-d':
            word = arg
        elif opt == '-t':
            filetypes = []
            if arg.count(",") != 0:
                filetypes = arg.split(",")
            else:
                filetypes.append(arg)
                print filetypes
        elif opt == '-l':
            limit = int(arg)
        elif opt == '-h':
            localanalysis = arg
        elif opt == '-n':
            filelimit = int(arg)
        elif opt == '-o':
            dir = arg
        elif opt == '-f':
            outhtml = arg
    if os.path.exists(dir):
        pass
    else:
        os.mkdir(dir)
    if localanalysis == "no":
        print "\n[-] Starting online search..."
        for filetype in filetypes:
            print "\n[-] Searching for " + filetype + " files, with a limit of " + str(
                limit)
            search = googlesearch.search_google(word, limit, start, filetype)
            search.process_files()
            files = search.get_files()
            print "Results: " + str(len(files)) + " files found"
            print "Starting to download " + str(filelimit) + " of them:"
            print "----------------------------------------\n"
            counter = 1
            for x in files:
                if counter <= filelimit:
                    print "[" + str(counter) + "/" + str(filelimit) + "] " + x
                    getfile = downloader.downloader(x, dir)
                    getfile.down()
                    filename = getfile.name()
                    if filename != "":
                        if filetype == "pdf":
                            test = metadataPDF.metapdf(dir + "/" + filename,
                                                       password)
                        elif filetype == "doc" or filetype == "ppt" or filetype == "xls":
                            test = metadataMSOffice.metaMs2k(dir + "/" +
                                                             filename)
                            if os.name == "posix":
                                testex = metadataExtractor.metaExtractor(
                                    dir + "/" + filename)
                        elif filetype == "docx" or filetype == "pptx" or filetype == "xlsx":
                            test = metadataMSOfficeXML.metaInfoMS(dir + "/" +
                                                                  filename)
                        res = test.getData()
                        if res == "ok":
                            raw = test.getRaw()
                            users = test.getUsers()
                            paths = test.getPaths()
                            soft = test.getSoftware()
                            email = []
                            if filetype == "pdf" or filetype == "docx":
                                res = test.getTexts()
                                if res == "ok":
                                    email = test.getEmails()
                                    for em in email:
                                        emails.append(em)
                                else:
                                    email = []
                                    failedfiles.append(x + ":" + str(res))
                            respack = [x, users, paths, soft, raw, email]
                            all.append(respack)
                        else:
                            failedfiles.append(x + ":" + str(res))
                            print "\t [x] Error in the parsing process"  #A error in the parsing process
                    else:
                        pass
                counter += 1
    else:
        print "[-] Starting local analysis in directory " + dir
        dirList = os.listdir(dir)
        print dirList
        for filename in dirList:
            if filename != "":
                filetype = str(filename.split(".")[-1])
                if filetype == "pdf":
                    test = metadataPDF.metapdf(dir + "/" + filename, password)
                elif filetype == "doc" or filetype == "ppt" or filetype == "xls":
                    print "doc"
                    test = metadataMSOffice.metaMs2k(dir + "/" + filename)
                    if os.name == "posix":
                        testex = metadataExtractor.metaExtractor(dir + "/" +
                                                                 filename)
                elif filetype == "docx" or filetype == "pptx" or filetype == "xlsx":
                    test = metadataMSOfficeXML.metaInfoMS(dir + "/" + filename)
                res = test.getData()
                if res == "ok":
                    raw = test.getRaw()
                    users = test.getUsers()
                    paths = test.getPaths()
                    soft = test.getSoftware()
                    if (filetype == "doc" or filetype == "xls"
                            or filetype == "ppt") and os.name == "posix":
                        testex.runExtract()
                        testex.getData()
                        paths.extend(testex.getPaths())
                        respack = [filename, users, paths, soft, raw, email]
                        all.append(respack)
                    else:
                        failedfiles.append(filename + ":" + str(res))
                        print "[x] Error in the parsing process"  # A error in the parsing process

                    if filetype == "docx" or filetype == "pdf":
                        res = test.getTexts()
                        if res == "ok":
                            email = test.getEmails()
                            for x in email:
                                emails.append(x)
                        else:
                            failedfiles(filename + ":" + str(res))
                    else:
                        print "pass"
            else:
                pass
    print "processing"
    proc = processor.processor(all)
    userlist = proc.sort_users()
    softlist = proc.sort_software()
    pathlist = proc.sort_paths()
    try:
        html = htmlExport.htmlExport(userlist, softlist, pathlist, all,
                                     outhtml, dir, failedfiles, word, emails)
        save = html.writehtml()
    except Exception, e:
        print e
        print "Error creating the file"
Exemple #2
0
    def post(self, target):
        filelimit = 50
        word = "local"
        localanalysis = "no"
        failedfiles = []
        emails = []
        '''
        if len(sys.argv) < 3:
            usage()
        try:
            opts, args = getopt.getopt(argv, "l:d:f:h:n:t:o:")
        except getopt.GetoptError:
            usage()
        '''

        opts = [('-d', target), ('-t', 'pdf,doc'), ('-l', '200'), ('-n', '50'),
                ('-o', 'downloadfiles'), ('-f', 'resultsss.html')]

        for opt, arg in opts:
            if opt == '-d':
                word = target
            elif opt == '-t':
                filetypes = []
                if arg.count(",") != 0:
                    filetypes = arg.split(",")
                else:
                    filetypes.append(arg)
                    print filetypes
            elif opt == '-l':
                limit = int(arg)
            elif opt == '-h':
                localanalysis = arg
            elif opt == '-n':
                filelimit = int(arg)
            elif opt == '-o':
                dir = arg
            elif opt == '-f':
                outhtml = arg
        if os.path.exists(dir):
            pass
        else:
            os.mkdir(dir)
        if localanalysis == "no":
            print "\n[-] Starting online search..."
            for filetype in filetypes:
                print "\n[-] Searching for " + filetype + " files, with a limit of " + str(
                    limit)
                try:
                    search = googlesearch.search_google(
                        word, limit, start, filetype)
                    search.process_files()
                    files = search.get_files()
                    print "Results: " + str(len(files)) + " files found"
                    print "Starting to download " + str(
                        filelimit) + " of them:"
                    print "----------------------------------------\n"
                    counter = 1

                    for x in files:
                        if counter <= filelimit:
                            print "[" + str(counter) + "/" + str(
                                filelimit) + "] " + x
                            getfile = downloader.downloader(x, dir)
                            getfile.down()
                            filename = getfile.name()
                            if filename != "":
                                if filetype == "pdf":
                                    test = metadataPDF.metapdf(
                                        dir + "/" + filename, password)
                                elif filetype == "doc" or filetype == "ppt" or filetype == "xls":
                                    test = metadataMSOffice.metaMs2k(dir +
                                                                     "/" +
                                                                     filename)
                                    if os.name == "posix":
                                        testex = metadataExtractor.metaExtractor(
                                            dir + "/" + filename)
                                elif filetype == "docx" or filetype == "pptx" or filetype == "xlsx":
                                    test = metadataMSOfficeXML.metaInfoMS(
                                        dir + "/" + filename)
                                res = test.getData()
                                if res == "ok":
                                    raw = test.getRaw()
                                    users = test.getUsers()
                                    paths = test.getPaths()
                                    soft = test.getSoftware()
                                    email = []
                                    if filetype == "pdf" or filetype == "docx":
                                        res = test.getTexts()
                                        if res == "ok":
                                            email = test.getEmails()
                                            for em in email:
                                                emails.append(em)
                                        else:
                                            email = []
                                            failedfiles.append(x + ":" +
                                                               str(res))
                                    respack = [
                                        x, users, paths, soft, raw, email
                                    ]
                                    all.append(respack)
                                else:
                                    failedfiles.append(x + ":" + str(res))
                                    print "\t [x] Error in the parsing process"  #A error in the parsing process
                            else:
                                pass
                        counter += 1
                except socket.error, ex:
                    print ex
Exemple #3
0
def doprocess(argv):
    filelimit = 50
    word = "local"
    localanalysis = "no"
    failedfiles = []
    emails = []
    if len(sys.argv) < 3:
        usage()
    try:
        opts, args = getopt.getopt(argv, "l:d:f:h:n:t:o:")
    except getopt.GetoptError:
        usage()
    for opt, arg in opts:
        if opt == '-d':
            word = arg
        elif opt == '-t':
            filetypes = []
            if arg.count(",") != 0:
                filetypes = arg.split(",")
            else:
                filetypes.append(arg)
                print filetypes
        elif opt == '-l':
            limit = int(arg)
        elif opt == '-h':
            localanalysis = arg
        elif opt == '-n':
            filelimit = int(arg)
        elif opt == '-o':
            dir = arg
        elif opt == '-f':
            outhtml = arg
    if os.path.exists(dir):
        pass
    else:
        os.mkdir(dir)
    if localanalysis == "no":
        print "\n[-] Starting online search..."
        for filetype in filetypes:
            print "\n[-] Searching for "+ filetype + " files, with a limit of " + str(limit)
            search = googlesearch.search_google(word, limit, start, filetype)
            search.process_files()
            files = search.get_files()
            print "Results: " + str(len(files)) + " files found"
            print "Starting to download " + str(filelimit) + " of them:"
            print "----------------------------------------\n"
            counter = 1
            for x in files:
                if counter <= filelimit:
                    print "[" + str(counter) + "/" + str(filelimit) + "] " + x
                    getfile = downloader.downloader(x, dir)
                    getfile.down()
                    filename = getfile.name()
                    if filename != "":
                        if filetype == "pdf":
                            test = metadataPDF.metapdf(dir + "/" + filename, password)
                        elif filetype == "doc" or filetype == "ppt" or filetype == "xls":
                            test = metadataMSOffice.metaMs2k(dir + "/" + filename)
                            if os.name == "posix":
                                testex = metadataExtractor.metaExtractor(dir + "/" + filename)
                        elif filetype == "docx" or filetype == "pptx" or filetype == "xlsx":
                            test = metadataMSOfficeXML.metaInfoMS(dir + "/" + filename)
                        res = test.getData()
                        if res == "ok":
                            raw = test.getRaw()
                            users = test.getUsers()
                            paths = test.getPaths()
                            soft = test.getSoftware()
                            email = []
                            if filetype == "pdf" or filetype == "docx":
                                res = test.getTexts()
                                if res == "ok":
                                    email = test.getEmails()
                                    for em in email:
                                        emails.append(em)
                                else:
                                    email = []
                                    failedfiles.append(x + ":" + str(res))
                            respack=[x, users, paths, soft, raw, email]
                            all.append(respack)
                        else:
                            failedfiles.append(x + ":" + str(res))
                            print "\t [x] Error in the parsing process" #A error in the parsing process
                    else:
                        pass
                counter += 1
    else:
        print "[-] Starting local analysis in directory " + dir
        dirList = os.listdir(dir)
        print dirList
        for filename in dirList:
            if filename != "":
                filetype = str(filename.split(".")[-1])
                if filetype == "pdf":
                    test = metadataPDF.metapdf(dir + "/" + filename, password)
                elif filetype == "doc" or filetype == "ppt" or filetype == "xls":
                    print "doc"
                    test = metadataMSOffice.metaMs2k(dir + "/" + filename)
                    if os.name == "posix":
                        testex = metadataExtractor.metaExtractor(dir + "/" + filename)
                elif filetype == "docx" or filetype == "pptx" or filetype == "xlsx":
                    test = metadataMSOfficeXML.metaInfoMS(dir + "/" + filename)
                res = test.getData()
                if res == "ok":
                    raw = test.getRaw()
                    users = test.getUsers()
                    paths = test.getPaths()
                    soft = test.getSoftware()
                    if (filetype == "doc" or filetype == "xls" or filetype == "ppt") and os.name=="posix":
                        testex.runExtract()
                        testex.getData()
                        paths.extend(testex.getPaths())
                        respack = [filename, users, paths, soft, raw, email]
                        all.append(respack)
                    else:
                        failedfiles.append(filename + ":" + str(res))
                        print "[x] Error in the parsing process"  # A error in the parsing process

                    if filetype == "docx" or filetype == "pdf":
                        res = test.getTexts()
                        if res == "ok":
                            email = test.getEmails()
                            for x in email:
                                emails.append(x)
                        else:
                            failedfiles(filename + ":" + str(res))
                    else:
                        print "pass"
            else:
                pass
    print "processing"
    proc = processor.processor(all)
    userlist = proc.sort_users()
    softlist = proc.sort_software()
    pathlist = proc.sort_paths()
    try:
        html = htmlExport.htmlExport(userlist, softlist, pathlist, all, outhtml, dir, failedfiles, word, emails)
        save = html.writehtml()
    except Exception, e:
        print e
        print "Error creating the file"
Exemple #4
0
def doprocess(argv):
	localanalysis= "no"
	if len(sys.argv) < 3:
		usage()
	try:
		opts,args = getopt.getopt(argv,"l:d:f:h:n:t:o:")
	except getopt.GetoptError:
		usage()
	for opt,arg in opts:
		if opt == '-d':
			word = arg
		elif opt == '-t':
			filetypes=[]
			if arg.count(",") != 0:
				filetypes = arg.split(",")
			else:
				filetypes.append(arg)
				print filetypes
		elif opt == '-l':
			limit = int(arg)
		elif opt == '-h':
			localanalysis=arg
		elif opt == '-n':
			filelimit = int(arg)
		elif opt == '-o':
			dir = arg
		elif opt == '-f':
			outhtml = arg
	if os.path.exists(dir):
		pass
	else:
		os.mkdir(dir)
	if localanalysis == "no":
		print "[-] Starting online search..."
		for filetype in filetypes:
			print "\n[-] Searching for "+filetype+ " files, with a limit of " + str(limit)
			search=googlesearch.search_google(word,limit,start,filetype)
			search.process_files()
			files=search.get_files()
			print "Results: " + str(len(files)) + " files found" 
			print "Starting to download "+ str(filelimit) + " of them.."
			print "----------------------------------------------------\n"
			counter=0
			for x in files:
				if counter <= filelimit:
					print "["+str(counter+1)+"/"+str(filelimit)+"] " + x
					getfile=downloader.downloader(x,dir)
					getfile.down()
					filename=getfile.name()	
					try:
						if filename !="":
							if filetype == "pdf":
								test=metadataPDF.metapdf(dir+"/"+filename,password)
							elif filetype == "doc" or filetype == "ppt" or filetype == "xls":
								test=metadataMSOffice.metaMs2k(dir+"/"+filename)	
								if os.name=="posix":
									testex=metadataExtractor.metaExtractor(dir+"/"+filename)
							elif filetype == "docx" or filetype == "pptx" or filetype == "xlsx":
								test=metadataMSOfficeXML.metaInfoMS(dir+"/"+filename)
							res=test.getData()
							if res=="ok":
								raw=test.getRaw()
								users=test.getUsers()
								paths=test.getPaths()
								soft=test.getSoftware()
								if (filetype == "doc" or filetype == "xls" or filetype == "ppt") and os.name=="posix":
									testex.runExtract()
									testex.getData()
									paths.extend(testex.getPaths())
								respack=[x,users,paths,soft,raw]
								all.append(respack)
							else:
								print "error" #A error in the parsing process
						else:
							print "pass"
					except Exception, e:
						print("ERROR: "+str(e))
					counter+=1