def index(): if request.method == 'POST': if 'file' not in request.files: flash('No File Part') return redirect(request.url) file = request.files['file'] if file.filename == '': flash('No selected File') return redirect(request.url) if file and allowed_filename(file.filename): filename = secure_filename(file.filename) filename_tokenized = file.filename.split('.') if filename_tokenized[-1] == 'xlsx': data = pd.read_excel(file) else: data = pd.read_csv(file) #file.save(os.path.join(app.config['UPLOAD_FOLDER'],filename)) #print data.head(5) cleaned_data = main(data) #print "Data is all Cleaned" #print "\\\\" #print "Going to Classify the Data" classified_dataframe = model(cleaned_data) classified_dataframe.to_csv(filename_tokenized[0] + ".csv", index=False, encoding='utf-8') return render_template('final.html', title='All Done') return render_template('index.html', title='Predictor_App | Home')
def startProcess(): allPaths = [] args = [] if ent1.get() != "": allPaths.append(ent1.get()) if ent2.get() != "": allPaths.append(ent2.get()) if ent3.get() != "": allPaths.append(ent3.get()) if ent4.get() != "": allPaths.append(ent4.get()) if ent5.get() != "": allPaths.append(ent5.get()) for path in allPaths: args.append("-p") args.append(path) if filesVar.get() == True: args.append("-f") if verboseVar.get() == True: args.append("-v") if silentVar.get() == True: args.append("-s") if testonlyVar.get() == True: args.append("-t") args.append("-d") args.append(numericBox.get()) old_stdout = sys.stdout sys.stdout = mystdout = io.StringIO() output = cleaner.main(args) outEntry.insert(END, mystdout.getvalue()) sys.stdout = old_stdout
def cleaner(): print "cleaner function" import cleaner reload(cleaner) cleaner.main()
__doc__ = """Run ingest logic for the server's database. This calls cleaner, pusher and dashViewer.""" # core user lib import cleaner import pusher import dashViewer as views # Python core import time import os cleaner.main() pusher.main() views.main() # TODO: after this call a git commit and push to update GitHub and let anyone see the latest data
filenames = [os.path.join(DATA_DIR, x) for x in os.listdir(DATA_DIR) if not os.path.isdir(os.path.join(DATA_DIR,x))] print(filenames) cleaned_file_paths = [] for filename in filenames: src = filename dst = os.path.join(CLEAN_DIR, os.path.basename(filename)) cleaned_file_paths.append(dst) args = ['tmp', src, dst] if len(os.listdir(CLEAN_DIR)) < 3: cleaner.main(args) # aggregate into a single file print("***** AGGREGATING *****") final_data_file = os.path.join(CLEAN_DIR, "simple.txt") if not os.path.exists(final_data_file): with open(final_data_file, 'w', encoding='utf8') as all_data_file: for clean_file in tqdm(cleaned_file_paths): with open(clean_file, 'r', encoding='utf8') as individual_data_file: lines = individual_data_file.readlines() # truncate to some random 50 lines start_point = random.randint(0, len(lines)) end_point = start_point + 50 lines = lines[start_point:end_point] all_data_file.writelines(lines)
def test_cleaner(self): self.reset() cleaner.main() self.assertEquals(XmlSetBase.parse('upload-gold.xml'), XmlSetBase.parse('upload-xml-test.xml'))
def scrubFile(self): self.scrubStatusLabelText.set('Scrubbing...') return self.scrubStatusLabelText.set(cleaner.main(self.directory, self.csvfile))
def main(): scraper.main() parser.main() cleaner.main() word2vec.main() randomize_batches.main()
def clean_logs(): cleaner.main()
def scrubFile(self): self.scrubStatusLabelText.set('Scrubbing...') return self.scrubStatusLabelText.set( cleaner.main(self.directory, self.csvfile))