def add_face_to_data(): cam_hd = cv2.VideoCapture(0) cam_ld = cv2.VideoCapture(1) face_cascade = cv2.CascadeClassifier( './haarcascade_frontalface_default.xml') dir_name = get_dir() i, k = 0, 0 while True: gray_hd, gray_ld, faces_hd, faces_ld = cap_get_faces(cam_hd, cam_ld, face_cascade) show_face(gray_hd, gray_ld, faces_hd, faces_ld) if len(faces_hd) > 0 and i < 10: x, y, w, h = faces_hd[0] save_img(gray_hd[y: y+h, x: x+w], dir_name) i += 1 if len(faces_ld) > 0 and k < 10: x, y, w, h = faces_ld[0] save_img(gray_ld[y: y+h, x: x+w], dir_name, cam='ld') k += 1 if i >= 9 and k >= 9: break print('HD picks:', i) print('LD picks:',k) sleep(1) cam_hd.release() cam_ld.release() main() main_loop()
def main(): """ Run the west nile virus prediction, and intervention recommendation model. """ print('Running model...') process.main() # process data if not os.path.isfile( 'models/wnv_predict.pkl'): # check if trained model exists train.main() # train model if one does not exist predict.main() # generate wnv predictions recommend.main() # generate recommendations print('Done.')
def main(): """ Train prediction model """ print('Training model...') data_file = "processed_data/processed_train.csv" try: model_data = pd.read_csv(data_file) except: print('Processed data missing.\nProcessing raw data.') process.main() model_data = pd.read_csv(data_file) model_data = model_data.drop(['date', 'nummosquitos'], axis=1) y = model_data.pop('wnvpresent') X = model_data X_train, X_test, y_train, y_test = train_test_split(X, y) try: forest = joblib.load('models/wnv_predict.pkl') print('Existing model found.') except: forest = RandomForestClassifier(class_weight='balanced_subsample', n_jobs=-1, random_state=42) param_grid = dict(max_depth=np.random.randint(1, 10, 10), min_samples_split=np.random.sample(5), min_samples_leaf=np.random.sample(5)/2, min_weight_fraction_leaf=np.random.sample(5)/2, max_features=np.random.sample(5)) grid = GridSearchCV(forest, param_grid, cv=10, scoring='neg_log_loss', n_jobs=-1, verbose=True) grid.fit(X_train, y_train) forest = grid.best_estimator_ print(forest) joblib.dump(forest, 'models/wnv_predict.pkl') probabilities = forest.predict_proba(X) probs = pd.DataFrame(probabilities[:, 1], columns=['wnv_probablitiy']) probs.index.rename('Id', inplace=True) probs.index = probs.index + 1 probs.to_csv('data/prediction_probabilities.csv') print('Training complete.')
def get_country_clicks(): auth_header = request.headers.get('Authorization') # check if authorization header is included and parse token if auth_header: if 'Bearer' not in auth_header: return jsonify({ 'status': 'fail', 'message': 'Token must be Bearer Token' }) try: access_token = auth_header.split(' ')[1] except IndexError: return jsonify({'status': 'fail', 'message': 'Token malformed'}) else: return jsonify({ 'status': 'fail', 'message': 'Token empty. Please provide a valid bearer token' }) country_id = None if 'country_id' in request.args: country_id = request.args['country_id'] country_clicks = process_data.main(access_token, country_id=country_id)['response'] return jsonify(country_clicks)
def test_main_without_days(self, dir_path, data_path, input_path, template_path, output_path, aws_session, check_available_days, get_available_files, output_dict, add_location_to_stop_data, create_csv_data, write_info_to_kepler_file, config): dir_path.return_value = self.data_path data_path.return_value = self.data_path input_path.return_value = self.data_path template_path.return_value = self.data_path output_path.return_value = self.data_path check_available_days.return_value = [] with self.assertRaises(SystemExit) as cm: process_data.main( ['process_data', '2020-05-08', '2020-05-08', 'output']) self.assertEqual(cm.exception.code, 1)
def test_main(self, dir_path, data_path, input_path, template_path, output_path, aws_session, check_available_days, get_available_files, output_dict, add_location_to_stop_data, add_location_to_metro_data, add_location_to_metrotren_station_data, create_csv_data, write_info_to_kepler_file, config): dir_path.return_value = self.data_path data_path.return_value = self.data_path input_path.return_value = self.data_path template_path.return_value = self.data_path output_path.return_value = self.data_path check_available_days.return_value = [ datetime.strptime('2020-02-05', "%Y-%m-%d") ] output_dict.return_value = [ mock.MagicMock(), mock.MagicMock(), mock.MagicMock() ] process_data.main( ['process_data', '2020-05-08', '2020-05-08', 'output'])
def main(): process_data.main() sparse_user_item = load_npz("./output/sparse_user_item.npz") train_data, test_data, users_altered = test_train_split(sparse_user_item) als_model, user_vecs, item_vecs = train_model( train_data.T ) # the parameter to train_model should be item - user matrix print( "implicit_recomm_auc,popularity_auc", evaluate_model(train_data, users_altered, [csr_matrix(user_vecs), csr_matrix(item_vecs.T)], test_data)) directory = './output' if not os.path.exists(directory): os.makedirs(directory) np.save('./output/item_vecs', item_vecs) np.save('./output/user_vecs', user_vecs) with open('./output/als_model', 'wb') as file: pickle.dump(als_model, file)
def main(): train_features, test_features, train_targets, test_targets = process_data.main( ) tree_acc = tree_learner(train_features, test_features, train_targets, test_targets) svm_acc = svm_learner(train_features, test_features, train_targets, test_targets) nb_acc = nb_learner(train_features, test_features, train_targets, test_targets) print(tree_acc) # 0.9809 print(svm_acc) # 0.9826 print(nb_acc) # 0.9287
def main(): # output_dir = "./output_wmgm" # TODO: be able to set with argument file_output = "results_all.csv" # csv output # fdata2 = "data2.nii.gz" # Get list of files in folder1 folder1, folder2 = folder_data fname_csv_list = sorted(glob.glob(os.path.join(folder1, "*.csv"))) # initialize dataframe results_all = pd.DataFrame(columns={'WM', 'GM', 'Noise', 'Smooth', 'SNR', 'Contrast', 'Sharpness'}) # loop and process i = 0 for fname_csv in fname_csv_list: # get file name metadata = pd.Series.from_csv(fname_csv, header=None).to_dict() file_data = metadata["File"] # get fname of each nifti file fname1 = os.path.join(folder1, file_data) fname2 = os.path.join(folder2, file_data) # display print("\nData #1: " + fname1) print("Data #2: " + fname2) # process pair of data results = process_data.main([fname1, fname2], file_seg, file_gmseg, register=register, verbose=verbose) # append to dataframe results_all = results_all.append({'WM': metadata['WM'], 'GM': metadata['GM'], 'Noise': metadata['Noise'], 'Smooth': metadata['Smooth'], 'SNR_single': results.loc['SNR_single'][0], 'SNR_diff': results.loc['SNR_diff'][0], 'Contrast': results.loc['Contrast'][0]}, ignore_index=True) # 'Sharpness': results.loc['Sharpness'][0]}, ignore_index=True) results_all.to_csv(file_output)
def main(new_exp, p_raw_files, raw_f_subfold, p_stages, p_ref_csv_files, wrk_dir, f_vars_to_extract, f_pattern_ref, tests, spinup, lclean, ltestsuite, lverbose): # init logger logger_config.init_logger(lverbose, __file__) log.banner('Start sanity checker') # make all paths from user to absolute paths wrk_dir = utils.abs_path(wrk_dir) p_stages = utils.abs_path(p_stages) p_ref_csv_files = utils.abs_path(p_ref_csv_files) f_pattern_ref = utils.abs_path(f_pattern_ref) # create directories os.makedirs(p_stages, exist_ok=True) os.makedirs(wrk_dir, exist_ok=True) # go to working directory os.chdir((wrk_dir)) log.info('Working directory is {}'.format(wrk_dir)) # data processing takes a while, check that no step is done twice actions = utils.determine_actions_for_data_processing( new_exp, tests, p_stages, lclean) # create dataframe out of raw data results_data_processing = process_data.main( new_exp, actions, tests, spinup, p_raw_files=p_raw_files, p_stages=p_stages, raw_f_subfold=raw_f_subfold, f_vars_to_extract=f_vars_to_extract, f_pattern_ref=f_pattern_ref) results_test, references = perform_test.main( new_exp, results_data_processing=results_data_processing, p_stages=p_stages, tests=tests, p_ref_csv_files=p_ref_csv_files, ltestsuite=ltestsuite, f_vars_to_extract=f_vars_to_extract) if 'welch' in tests: test = 'welch' plt.plt_welchstest(references[test].append( results_data_processing[test], sort=False), new_exp, results_test[test], p_stages=p_stages) # Add experiment to the reference pool #-------------------------------------------------------------------- log.banner('') log.banner('Check results again before adding to reference pool') log.banner('') for test in tests: test_cfg = test_config.get_config_of_current_test(test) utils.print_warning_if_testresult_is_bad(test, results_test[test], test_cfg.metric_threshold, test_cfg.metric) if ltestsuite: asw = 'YES' else: asw = input('If you are happy with this experiment, ' 'do you want to add it to the reference pool ?' '(yes/[No])\n') if (asw.strip().upper() == 'YES') or (asw.strip().upper() == 'Y'): add_exp_to_ref.main(new_exp, tests, p_stages=p_stages, ltestsuite=ltestsuite, p_ref_csv_files=p_ref_csv_files) else: args_for_manual_execution = \ utils.derive_arguments_for_add_exp_to_ref(new_exp, tests, p_stages, p_ref_csv_files) log.info('The experiment {} is NOT added to ' 'the reference pool \n'.format(new_exp)) log.info('If you want to add the experiment {} ' 'to the reference pool later on, type ' 'the following line when you are ready:'.format( new_exp, new_exp)) log.info('') log.info( 'python add_exp_to_ref.py {}'.format(args_for_manual_execution)) log.banner('') log.banner('Sanity test finished') log.banner('')
for filename in files: fetch(address, filename, version, HTTPBasicAuth(username, password)) reformat_data(version) if __name__ == "__main__": parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) parser.add_argument( "version", nargs="?", help= ("Experiment version. This corresponds to the experiment_code_version " "parameter in the psiTurk config.txt file that was used when the " "data was collected.")) c = configparser.ConfigParser() c.read('config.txt') sp = c['Server Parameters'] url = 'https://' + sp['adserver_revproxy_host'] + '/data' version = parser.parse_args().version if version == None: version = c["Task Parameters"]["experiment_code_version"] print("Fetching data for current version: ", version) main(version, url, sp['login_username'], sp['login_pw']) sys.path.append('bin') import process_data process_data.main(version)
if now.minute % burst_int == 0 and now.second == 0: logger.info("starting burst") #create file name fname = dataDir + floatID + '_GPS_' + "{:%d%b%Y_%H%M%SUTC.dat}".format( datetime.utcnow()) logger.info("file name: %s" % fname) #call record_gps u, v, z, lat, lon = record_gps(ser, fname) #check if burst completed with 2048 poi try: if os.path.isfile(fname) and os.path.getsize(fname) > 0: #call data processing script logger.info('starting to process data') print(u.shape) process_data.main(u, v, z, lat, lon) else: logger.info( 'data file does not exist or does not contain enough data for processing' ) except OSError as e: logger.info(e) sys.exit(1) else: sleep(0.25) else: logger.info("GPS not initialized, exiting") sys.exit(1)
import process_data from elasticsearch import Elasticsearch,helpers #load data recipes,ingredients_df=process_data.main() #elasticsearch should be up and running on localhost:9200 es = Elasticsearch(host="localhost", port=9200) actions = [{ "_index":"recipe_index", "_id": recipes_dict['id'][i], "Url": recipes_dict['Url'][i], "Instructions": recipes_dict['Instructions'][i], "Ingredients": recipes_dict['Ingredients'][i], "Ingredients_unique":ingredients_df[ingredients_df.id == recipes_dict['id'][i]].ingredient.values.tolist(), "Name": recipes_dict['Name'][i] } for i in range(0, len(recipes_dict['Url'].keys())) ] #bulk insert to index helpers.bulk(es,actions) #view the data #localhost:9200/recipe_index/_search?pretty
#Get and transform user's resume or linkedin user_input = st.text_area("copy and paste your resume or linkedin here", '') user_input = str(user_input) user_input = re.sub('[^a-zA-Z0-9\.]', ' ', user_input) user_input = user_input.lower() user_input = pd.Series(user_input) #load NLP + classification models topic_model = pickle.load(open('topic_model.sav', 'rb')) classifier = pickle.load(open('classification_model.sav', 'rb')) vec = pickle.load(open('job_vec.sav', 'rb')) classes, prob = pda.main(user_input, topic_model, classifier, vec) data = pd.DataFrame(zip(classes.T, prob.T), columns=['jobs', 'probability']) #Plot probability of person belonging to a job class def plot_user_probability(): #plt.figure(figsize = (2.5,2.5)) plt.barh(data['jobs'], data['probability'], color='r') plt.title('Percent Match of Job Type') st.pyplot() #Plot where user fits in with other job clusters def plot_clusters(): st.markdown(