def CreateDataForLDA(Location,fileName=None): # Check if the folder that user wants to use already exists TotalFilesToProcess = 0 DataHolder = [] tokenizer = WordPunctTokenizer() english_stops = set(stopwords.words('english')) stemmer = PorterStemmer() lemmatizer = WordNetLemmatizer() if os.path.exists(Location): print "The Folder you specified already exists, all the files those you want to use should be here" else: os.mkdir(Location) print "Folder got created" for eachFileInFolder in glob.iglob(Location+"*.txt"): TotalFilesToProcess= TotalFilesToProcess + 1 print "Now Processing the File : " + eachFileInFolder FileContents = Path(eachFileInFolder).read_text() tokens = tokenizer.tokenize(FileContents.encode('ascii',errors='ignore')) StopWordsRemovedText = [word for word in tokens if word not in english_stops] words="" for idx, word in enumerate(StopWordsRemovedText): words = words + " " +stemmer.stem(lemmatizer.lemmatize(word)) DataHolder.append(words) print "Hello, basic stemming and stop word removal is done" return(DataHolder)
def upload(request): # Handle file upload if request.method == 'POST': form = DocumentForm(request.POST, request.FILES) if form.is_valid(): my_file = Paths("/home/fernando/Desktop/myproject/media/samples/" + request.FILES['docfile'].name) if my_file.is_file(): os.remove("/home/fernando/Desktop/myproject/media/samples/" + request.FILES['docfile'].name) # remove duplicates newdoc = Document(docfile=request.FILES['docfile']) newdoc.save() my_file = '/home/fernando/Desktop/myproject/media/samples/' + request.FILES[ 'docfile'].name pathtofile = my_file.encode('ascii', 'ignore') subprocess.call( '/home/fernando/Desktop/myproject/myproject/myapp/submitsample.sh ' + pathtofile, shell=True) time.sleep(5) latest_file_sha = max([ os.path.join('/home/fernando/.cuckoo/storage/binaries', d) for d in os.listdir('/home/fernando/.cuckoo/storage/binaries') ], key=os.path.getmtime) latest_file_sha = os.path.basename(latest_file_sha) latest_dir_analisys = os.path.basename( os.readlink('/home/fernando/.cuckoo/storage/analyses/latest')) filename = request.FILES['docfile'].name dirpath = '/home/fernando/.cuckoo/storage/analyses/' + str( (int(latest_dir_analisys) + 1)) + '/' newinfo = Path(namefile=filename, idsha256=latest_file_sha, pathto=dirpath) # Ok newinfo.save() print newinfo.idsha256 # analisysinfo = Path.objects.create(namefile=filename,idsha256=latest_file_sha,pathto=dirpath) return render( request, 'submited.html', { 'latest_file_sha': latest_file_sha, 'dirpath': dirpath, 'filename': filename }) # Redirect to the document list after POST # return HttpResponseRedirect(reverse('upload')) else: form = DocumentForm() # A empty, unbound form # Render list page with the documents and the form return render(request, 'upload.html', {'form': form})
def main(): log("INFO: starting application...") #// def get_callback(fut, file): def callback(fut): try: log("INFO: response received for " + file + " " + fut.result()) os.rename(os.path.join(fld_pro, file), os.path.join(fld_arc, file)) futures.pop(file) except: log('ERROR: error processing file {} for {}.'.format( fut.exception(), file)) os.rename(os.path.join(fld_pro, file), os.path.join(fld_err, file)) futures.pop(file) return callback #// while True: #// ARE THERE ANY NEW FILES? MOVE THEM TO PROCESSING. for file in os.listdir(fld_src): if file.startswith(fle_prefix): log('INFO: found new file ' + file) os.rename(os.path.join(fld_src, file), os.path.join(fld_pro, file)) time.sleep(.100) #// PROCESS FILES for file in os.listdir(fld_pro): if file.startswith(fle_prefix): if file not in futures: futures.update({ file: None }) #// ADD THE FILE NAME TO THE EXPECTED RESPONSE QUEUE msg = Path(os.path.join(fld_pro, file)).read_text() future = publisher.publish(topic_path, data=msg.encode('utf-8')) future.add_done_callback(get_callback(future, file)) log("INFO: sent publish request for " + file) #// RETRY ERROR FILES #// CLEAN UP ARCHIVES time.sleep(5)