def process_single_day(day, data_dir): day_t = arrow.get(day) fpath = os.path.join(data_dir, 'vehicle_positions', day + '.csv') stops = get_metadata(day_t, 'stops', data_dir) schedule = get_metadata(day_t, 'schedule', data_dir) print('Processing file:', fpath) return process_day(pd.read_csv(fpath), stops, schedule)
def fetch_image_meta(paths=None): if paths: meta = get_metadata(f={'path': {'$in': paths}}, master_db=True) else: meta = get_metadata(master_db=True) meta = {m['imageName']: 1 for m in meta} return meta
def put(self): ''' Add a new analytic via file upload. This is a security risk. ''' try: time = datetime.now() # make the id more meaningful file = request.files['file'] filename = secure_filename(file.filename) name, ext = splitext(filename) if not ext in ALLOWED_EXTENSIONS: return 'This filetype is not supported.', 415 #save the file analytic_id = name + str(time.year) + str(time.month) + str( time.day) + str(time.hour) + str(time.minute) + str( time.second) filepath = ANALYTICS_OPALS + analytic_id + '.py' file.save(filepath) #get the metadata from the file metadata = utils.get_metadata(analytic_id) metadata['analytic_id'] = analytic_id _, col = analytics_collection() col.insert(metadata) meta = drop_id_key(metadata) except: tb = traceback.format_exc() return tb, 406 return meta, 201
def track_changed(self): if not self.on: return if not bool(lib.SpPlaybackIsActiveDevice()): return self.pause() # Scrobble last song only if the song has been played more than half # of its duration or during more than 4 minutes if self.metadata and self.play_cumul > min( self.metadata["duration"] / 2000, 240): self.lastfm_network.scrobble( artist=self.metadata["artist_name"], title=self.metadata["track_name"], timestamp=int(self.metadata["time_on"]), album=self.metadata["album_name"], duration=(self.metadata["duration"] / 1000)) print "LastFM: scrobbled track " + self.metadata[ "track_name"] + " - " + self.metadata["artist_name"] # Update now playing song self.play_cumul = 0 self.play() self.metadata = get_metadata() self.metadata["time_on"] = time.time() self.lastfm_network.update_now_playing( artist=self.metadata["artist_name"], title=self.metadata["track_name"], album=self.metadata["album_name"], duration=int(self.metadata["duration"] / 1000))
def get_data_matrix(cls, feature, label=None, unlabelled=False, ignore_metadata=False): min_max_scaler = MinMaxScaler() f = {} if label: label_images = utils.filter_images(label) f = {'path': {'$in': label_images}} # Build and scale feature matrix images, feature_space = utils.get_all_vectors(feature, f=f, unlabelled_db=unlabelled) feature_space = min_max_scaler.fit_transform(feature_space) # Not including metadata boosts accuracy of Set 2 # Including metadata boosts accuracy of Set 1 if ignore_metadata: meta = utils.get_metadata(unlabelled_db=unlabelled) # Mapping between image file path name and the metadata meta = {m['path']: m for m in meta} return images, meta, feature_space # Build and scale metadata matrix meta, metadata_space = cls.get_metadata_space(images, unlabelled_db=unlabelled) metadata_space = min_max_scaler.fit_transform(metadata_space) # Column stack them data_matrix = np.c_[feature_space, metadata_space] return images, meta, data_matrix
def metadata(): ip = args.elasticIp print( f'The elastic server ip address is {args.elasticIp} elastic server port is {args.elasticPort}' ) metadata = get_metadata(args.elasticIp, args.elasticPort) # print(metadata) return jsonify(metadata)
def get_unlabelled_data(feature): u_images, u_vectors = utils.get_all_vectors(feature, unlabelled_db=True) # Get metadata meta = utils.get_metadata(unlabelled_db=True) meta = {m['path']: m for m in meta} return u_images, meta, u_vectors
def get_filename(path, ext): with open(path, 'r') as f: line = f.readlines()[100] md5 = hashlib.md5(line).hexdigest() f.seek(0) created = get_metadata(f)['created'] timestamp = int(time.mktime(created.timetuple())) filename = '%s-%s' % (timestamp, md5) + '.' + ext return filename, created
def get_labelled_data(feature): # Get labelled images l_images, feature_space = utils.get_all_vectors(feature) # Get metadata meta = utils.get_metadata() meta = {m['path']: m for m in meta} return l_images, meta, feature_space
def process_range(start, end, data_dir): dates = date_range(arrow.get(start), arrow.get(end)) print('Processing dates from {} to {}'.format(start, end)) path = os.path.join(data_dir, 'vehicle_positions') + '/{}.csv' paths = map(lambda day: (path.format(day), arrow.get(day)), dates) results = [] for fpath, day in paths: stops = get_metadata(day, 'stops', data_dir) schedule = get_metadata(day, 'schedule', data_dir) now = arrow.now() print('Processing file:', fpath) df = process_day(pd.read_csv(fpath), stops, schedule) results.append(df) print('Process {} in {}s'.format(day, (arrow.now() - now).seconds)) combined = pd.concat(results) combined.to_csv('{}_{}.csv'.format(start, end), index=False)
def __init__(self, basedir, metadata_file=None, verbose=False, overwrite=False, dataurl=None, testing=False): self.basedir = basedir self.dirs = NarpsDirs(basedir, dataurl=dataurl, testing=testing) self.verbose = verbose self.teams = {} self.overwrite = overwrite self.started_at = datetime.datetime.now() self.testing = testing # create the full mask image if it doesn't already exist if not os.path.exists(self.dirs.full_mask_img): print('making full image mask') self.mk_full_mask_img(self.dirs) assert os.path.exists(self.dirs.full_mask_img) # get input dirs for orig data self.image_jsons = None self.input_dirs = self.get_input_dirs(self.dirs) # check images for each team self.complete_image_sets = {} self.get_orig_images(self.dirs) for imgtype in ['thresh', 'unthresh']: log_to_file( self.dirs.logfile, 'found %d teams with complete original %s datasets' % (len(self.complete_image_sets[imgtype]), imgtype)) # set up metadata if metadata_file is None: self.metadata_file = os.path.join( self.dirs.dirs['orig'], 'analysis_pipelines_for_analysis.xlsx') else: self.metadata_file = metadata_file self.metadata = get_metadata(self.metadata_file) self.hypothesis_metadata = pandas.DataFrame( columns=['teamID', 'hyp', 'n_na', 'n_zero']) self.all_maps = { 'thresh': { 'resampled': None }, 'unthresh': { 'resampled': None } } self.rectified_list = []
def upload_album(self): album_data = get_metadata( True) if self.args.metadata else self.metadata album = self.client.create_album(album_data) print('Created album named "{}"'.format(album_data.get('title'))) self.log_upload(album) album_id = album['id'] if self.client.auth else album['deletehash'] # get all images in the folder with approved file extensions files = [glob(os.path.join(self.args.path, '*' + ext)) for ext in file_extensions] files = sum(files, []) # ugly way to flatten list for f in files: print('Uploading {}'.format(os.path.basename(f))) img_data = get_metadata() if self.args.metadata else dict() self.upload_pic(f, img_data, album_id) return album['id'] # return album if more data is needed
def get_metadata_space(images): meta = get_metadata(master_db=True) # Mapping between image file path name and the metadata meta = {m['path']: m for m in meta} space = np.array([[ meta[i]['age'], mapping[meta[i]['gender']], mapping[meta[i]['skinColor']], mapping[meta[i]["accessories"]], meta[i]["nailPolish"], meta[i]["irregularities"] ] for i in images]) return meta, space
def ranking_item(): """ Rank the items based on their predicted ratings Outputs: ------- : predicted ratings, inference time, ordered item ids, item metadata """ if request.method == 'POST': ids = request.json user_id = int(ids['uid']) item_ids = ids['iids'] user_ids = np.full(100, user_id) item_ids = item_ids[1:-1].split(',') item_ids = np.array(item_ids).astype(int) texts_u = [] texts_i = [] for i in user_ids: texts_u.append(u_text[i].tolist()) for j in item_ids: texts_i.append(i_text[j].tolist()) user_ids = user_ids.reshape(-1, 1) item_ids = item_ids.reshape(-1, 1) # Feed the inputs to the Tensorflow Serving model res, time_dif = tf_serving(texts_u, texts_i, user_ids, item_ids) # Get the ranking results rating = np.array(res['final_rating/add_1:0']).reshape(-1) order = np.argsort(rating)[::-1] item_ids_new = item_ids.reshape(-1)[order] rating_new = rating[order] # Prepare the metadata for 10 suggested items des_meta, title_meta, price_meta, imurl_meta, categ_meta = get_metadata( df_meta, item_ids_new, num_top=10) return json.dumps({ 'rating': rating_new.tolist(), 'infertime': time_dif.total_seconds(), 'item_ids': item_ids_new.tolist(), 'des_meta': des_meta, 'title_meta': title_meta, 'price_meta': price_meta, 'imurl_meta': imurl_meta, 'categ_meta': categ_meta }) else: return render_template('candidate.html')
def read_metadata(pdf_path, document_uuid, document_name): try: metadata_dict = get_metadata(pdf_path) return dict( original_document=pdf_path, metadata=metadata_dict, document_uuid=document_uuid, document_name=document_name, ) except Exception as e: sentry_client(e)
def load_data(num_chord_comp=5, num_grain_comp=5): # Grain data grain_pca = np.load(stats_pca_path()+'grain_grain_pca_scores.npy') #load chord data chords = load_chords(cord_length_path()) chords_pca = get_chords_pca(chords, use_avg=True) #load labels metadata, class_map, subclass_map = get_metadata(stats_files()) classes = np.array([int(x['class_num']) for x in metadata]) #subclasses = np.array([x['subclass_num'] for x in metadata]) # xs x y return (grain_pca[:, :num_grain_comp] ,chords_pca[:, :num_chord_comp], classes)
def get_metadata_space(cls, images, unlabelled_db=False): meta = utils.get_metadata(unlabelled_db=unlabelled_db) # Mapping between image file path name and the metadata meta = {m['path']: m for m in meta} space = np.array([[ meta[i]['age'], cls.mapping[meta[i]['gender']], cls.mapping[meta[i]['skinColor']], cls.mapping[meta[i]["accessories"]], meta[i]["nailPolish"], meta[i]["irregularities"] ] for i in images]) return meta, space
def get_articles(input): articles = utils.get_metadata( input, utils.image_ext, lambda: [Article(id=-1, chain_id='', filename='', title='', images=[])], lambda x: x[0].images, lambda id, chain_id, filename: ArticleImage(id=id, chain_id=str(uuid.uuid4()), filename=filename, regions=[], title='', page=0, idx_on_page=0)) #ItJim: ^this part didn't work because was lacking parameters. return articles
def get_full_matrix(feature, unlabelled=False, master=False): # Get labelled images images, data = get_all_vectors(feature, unlabelled_db=unlabelled, master_db=master) # Get metadata meta = get_metadata(unlabelled_db=unlabelled, master_db=master) meta = {m['path']: m for m in meta} meta_space = np.array([[ meta[i]['age'], mapping[meta[i]['gender']], mapping[meta[i]['skinColor']], mapping[meta[i]["accessories"]], meta[i]["nailPolish"], meta[i]["irregularities"] ] for i in images]) return images, meta, np.c_[data, meta_space]
def function_create(): with utils.AtomicRequest() as atomic: function_id = uuid.uuid4().hex atomic.driver_endpoint = driver_endpoint user, tenant = utils.get_headers(request) zip_file = utils.get_zip(request) zip_url = utils.upload_zip(function_id, zip_file) if not zip_url: atomic.errors = True return critical_error('Not able to store zip.') atomic.zip_url = zip_url metadata = utils.get_metadata(request) if not utils.validate_json(utils.build_schema, metadata): atomic.errors = True return bad_request("Error validating json.") tag = "{0}_{1}_{2}".format(tenant, user, metadata.get('name')) payload = { "memory": metadata.get('memory'), "tags": [tag], "runtime": metadata.get('runtime'), "zip_location": zip_url, "name": metadata.get('name') } image_id = utils.create_image(driver_endpoint, payload) atomic.image_id = image_id function = utils.create_function(tenant, user, function_id, image_id, zip_url, tag, metadata) if not function: atomic.errors = True return critical_error('Error building the function.') return Response(function_id, status=201)
def post(self): drive_url = json.loads(self.request.body)['driveurl'] logging.info("Received the drive url: %s", drive_url) drive_id = self.parse_url(drive_url) presentation = \ Presentation.query(Presentation.drive_id == drive_id).get() if presentation is None: presentation = Presentation(drive_id=drive_id) slides = get_metadata(drive_id) slides_str = json.dumps(slides) logging.info(slides_str) presentation.slides = slides_str presentation_id = presentation.put().id() self.response.write(json.dumps({ 'presentation_id': str(presentation_id), }));
def play(url, nid): utils.log('play: ' + urllib.quote(url)) if nid == 'live': meta = utils.get_metadata(nid) # this is usually the live stream isn't currently active if 'error_msg' in meta: utils.log('cannot play stream: %s, %s' % (url, meta['error_msg'])) utils.dialog_error(meta['error_msg']) return """ # XXX disabled as not currently working? # permission dance. if we're already logged in (have a valid cookie), no need to log in again perms = utils.get_perms(nid) if not perms: # login and recheck video permissions if not utils.wsbk_login(): return perms = utils.get_perms(nid) if not perms: # we really mustn't have permission utils.log('no permission for video %s' % nid) utils.dialog_error('No permission to access this video. Check login details in plugin settings.') return """ (stream_url, meta) = utils.get_stream_url(nid) listitem = xbmcgui.ListItem(label=meta['title'], iconImage=meta['thumbnail_url'], thumbnailImage=meta['thumbnail_url']) utils.log("Playing stream: %s" % stream_url) try: xbmc.Player().play(stream_url, listitem) except: utils.dialog_error("Cannot play video")
def plot_y(): fig, axes = plt.subplots(3, 4, sharex=True) fig_2, axes_2 = plt.subplots(3, figsize=(9, 10)) for i, num_train_episodes in enumerate([500, 1000, 3000]): _, y = utils.get_metadata(num_train_episodes=num_train_episodes, artificial=False) for j in np.arange(y.shape[1]): ax = axes[i][j] ax.set_title(num_train_episodes) y_cur = y.iloc[:, j] sns.histplot(y_cur, ax=ax, stat="density", bins=8, palette="deep") ax.set_ylabel("Densidade") aux = y.values.T.flatten() aux = pd.DataFrame.from_dict({ "Algoritmo": np.repeat(y.columns, y.shape[0]), "Converged": aux > 0.0 }) sns.countplot(x="Algoritmo", hue="Converged", data=aux, ax=axes_2[i], palette="deep") if i != 2: axes_2[i].set_xlabel(None) ax = axes_2[i] ax.set_title(f"Episodios = {num_train_episodes}") ax.set_ylabel("Frequencia") legend_labels, _ = ax.get_legend_handles_labels() ax.legend(legend_labels, ["Não", "Sim"], title="Convergiu?") plt.tight_layout() plt.show()
def decision_tree_driver(args, evaluate=False): images, data_matrix = utils.get_all_vectors(args.decision_model) # Fetch unlabelled data (as provided in the settings) u_images, u_meta, unlabelled = helper.get_unlabelled_data( args.decision_model) #matrix, _, _, um = reducer(data_matrix, 30, "nmf", query_vector=unlabelled) matrix = data_matrix um = unlabelled l_matrix = matrix[:len(images)] u_matrix = um[:len(u_images)] dm = helper.build_labelled_matrix(l_matrix, images, 'aspectOfHand') # prepare test data query = helper.prepare_matrix_for_evaluation(u_matrix) max_depth = args.decision_max_depth min_size = args.decision_min_size prediction = decision_tree(dm, query, max_depth, min_size) dorsal_symbol = 0.0 palmar_symbol = 1.0 if evaluate: master_meta = utils.get_metadata(master_db=True) # Mapping between image file path name and the metadata master_meta = {m['imageName']: m for m in master_meta} truth = [ dorsal_symbol if master_meta[Path(image).name]['aspectOfHand'].split(' ')[0] == 'dorsal' else palmar_symbol for image in u_images ] print(helper.get_accuracy(truth, prediction)) return zip(u_images, prediction)
def train_mnist(project_id, epoch, train_per_epoch, interval): check_gpu(logger) project_metadata = get_metadata(project_id) train( dataset=load_mnist_dataset(project_id=project_id, buffer_size=60000, batch_size=256), gen=build_generator_model(), dis=build_discriminator_model(), gen_opt=keras.optimizers.Adam(1e-4), dis_opt=keras.optimizers.Adam(1e-4), logger=logger, epochs=epoch, start_epoch=0, interval=interval, train_per_epoch=train_per_epoch, sample_size=4, batch_size=32, visualize=visualize_mnist_sample, project_metadata=project_metadata, gen_input_generator=MnistInputGenerator(feat_dim=100), )
def ppr_driver(args, evaluate=False): l_images, u_images, l_meta, u_meta, l_matrix, u_matrix = PreparePPRData.prepare_data( args.model, args.k_latent_semantics, args.frt, args.ignore_metadata) # Build training data labelled = helper.build_matrix_with_labels(l_matrix, l_images, l_meta) # prepare test data query = helper.prepare_matrix_for_evaluation(u_matrix) # Evaluate predictions = ppr_classifier(labelled, query, frt=args.frt, k=args.k_latent_semantics, feature=args.model, edges=args.graph_edges, alpha=args.alpha, convergence=args.convergence) dorsal_symbol = 0.0 palmar_symbol = 1.0 if evaluate: master_meta = utils.get_metadata(master_db=True) # Mapping between image file path name and the metadata master_meta = {m['imageName']: m for m in master_meta} truth = [ dorsal_symbol if master_meta[Path(image).name]['aspectOfHand'].split(' ')[0] == 'dorsal' else palmar_symbol for image in u_images ] print(helper.get_accuracy(truth, predictions)) # Visualization pending return zip(u_images, predictions)
"HC03_VC13" ] factors = list(get_factors(sources, n_factors).values()) means = [] diffs = [] for i, factor in enumerate(factors): values = np.array(list(factor.values())) # values = (values - np.min(values)) / (np.max(values) - np.min(values)) means.append(np.mean(values)) diffs.append(np.mean(values, 1)) # diffs = (diffs - np.min(diffs)) / (np.max(diffs) - np.min(diffs)) fig = plt.figure() plt.plot(sources, 10 * lambdas_diff, label="λ") for n_factor, diff in zip(n_factors, diffs): plt.plot(sources, diff, label=get_metadata(2010, n_factor, False)) plt.legend(loc="upper left") plt.show() X = [np.array(list(xs)) for xs in zip(*diffs)] # X = np.array([np.array(diffs[0]) * np.array(diffs[1])]).reshape(-1, 1) y = lambdas_diff # plt.plot(sources, 3*y) # plt.plot(sources, X) # plt.show() reg = LinearRegression() reg.fit(X, y) print(reg.score(X, y)) print(reg.coef_) print(reg.intercept_)
def new(): if request.method == 'POST': original_url = str(request.form.get('url')) pixel_script = str(request.form.get('pixel_script')) keyword = str(request.form.get('keyword')) try: metadata = utils.get_metadata(original_url) template_name = "redirection_debug.html" if DEBUG == True: template_name = "redirection_debug.html" else: template_name = "redirection.html" if "title" in metadata: metadata_title = metadata.title else: metadata_title = "" if "type" in metadata: metadata_type = metadata.type else: metadata_type = "" if "image" in metadata: metadata_image = metadata.image else: metadata_image = "" if "description" in metadata: metadata_description = metadata.description else: metadata_description = "" finally: metadata_title = "" metadata_type = "" metadata_image = "" metadata_description = "" html_file = render_template(template_name, url=original_url, title=metadata_title, type=metadata_type, image=metadata_image, description=metadata_description, pixel_script=pixel_script) filename = shortuuid.ShortUUID().random(length=6) filename = filename + ".html" directory = "r/" + keyword if not os.path.exists(directory): os.makedirs(directory) with open(directory + "/" + filename, mode="w", encoding="utf-8") as file: file.write(str(html_file)) file.close() # write to csv fp = open("static/" + "data.csv", "a") try: writer = csv.writer(fp) writer.writerow((str(original_url), str(filename))) finally: fp.close() # return redirect(SHORT_SITE + "/static/" + filename ) return render_template("new.html", redirect_url=SHORT_SITE + directory + "/" + filename) return render_template("new.html")
if prediction in fname: passed += 1 else: failed += 1 print('*' * 50) print(' > passed: ', passed) print(' > failed: ', failed) ar = passed / (passed + failed) print(' > accuracy ratio: ', '%.2f' % (ar * 100), '%') def get_files(dname, fpath): dpath = os.path.join(fpath, dname) return [f'{dpath}/{fname}' for fname in os.listdir(dpath)] if __name__ == '__main__': model_path = sys.argv[1] fpath = sys.argv[2] # Training dataset metadata _, class_names, class_to_idx = get_metadata(fpath) num_classes = len(class_names) idx_to_class = {value: key for key, value in class_to_idx.items()} flist = [get_files(cls, fpath) for cls in class_names] files = list(reduce(lambda x, y: x + y, flist)) result = predict_all(files, idx_to_class, model_path) analyze(result)
# y = [ class binary vars | subclass binary vars | pct] for item in metadata: y[c, class_map[item['class']]] = 1 y[c, subclass_map[item['subclass']] + len(total_classes) ] = 1 #TODO fix the filenames or write a script to handle vol frac # print item['volume_frac'] # y[c, len(total_classes) + len(total_subclasses)] = item['volume_frac'] c += 1 return y if __name__ == '__main__': num_grain_comp = 15 num_chord_comp = 3 num_folds = 5 metadata, class_map, subclass_map = get_metadata(paths.stats_files()) if os.path.isfile(paths.stats_pca_path()+'grain_grain_pca_scores.npy'): print 'PCA .npy found, loading.' pca_scores = np.load(paths.stats_pca_path()+'grain_grain_pca_scores.npy') else: x = load_data(paths.stats_files()) pca_scores = get_pca(x) np.save(paths.stats_pca_path()+'grain_grain_pca_scores.npy', pca_scores) chords = load_chords(paths.cord_length_path()) chords_pca = get_chords_pca(chords, use_avg=True) input_params = flatten_input_params(metadata, class_map, subclass_map) # PLOTTING FCNS # plot_chords(chords[0:5,0]) # class_labels, class_data = group_components_by_class(metadata, chords_pca)
logging.basicConfig(level=logging.DEBUG) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("input", nargs=1, help="The training hdf5 file") parser.add_argument("-pre", dest="pre", help="Pretrained word embeddings file in word2vec format (word <space> embedding, one per line)") options = parser.parse_args() # Load config parameters locals().update(config) logging.debug('loaded config') # DATA hdf5_file = options.input[0] word_to_ix, ix_to_word, morpho_to_ix, ix_to_morpho = get_metadata(hdf5_file) vocab_size = len(word_to_ix) morpho_vocab_size = len(morpho_to_ix) train_stream = get_stream(hdf5_file, 'train', batch_size) dev_stream = get_stream(hdf5_file, 'dev', batch_size) logging.debug('loaded data') print "Number of words:", vocab_size print "Number of morphemes:", morpho_vocab_size # Save the word and morpheme indices to disk D = { } D["word_to_ix"] = word_to_ix D["morpho_to_ix"] = morpho_to_ix cPickle.dump(D, open("dicts.pkl", "w")) logging.debug('wrote dicts') # Load the pretrained vectors if available if options.pre is not None:
import numpy as np import pandas as pd from utils import get_dataframe, NumpyEncoder, get_metadata import json train_dir = "assist09_train.csv" test_dir = "assist09_test.csv" skill_matrix_dir = "assist09_skill_matrix.txt" df_train = get_dataframe(train_dir) df_test = get_dataframe(test_dir) # use this to extract the whole q-s graph df_total = pd.concat([df_train, df_test], ignore_index=True) skill_matrix = np.loadtxt(skill_matrix_dir) single_skill_cnt, skill_cnt, max_idx = get_metadata(skill_matrix, df_total) print("single skill: 0 ~ {}, multi-skill: {} ~ {}, question: {} ~ {}, correctness: {} and {}"\ .format(single_skill_cnt - 1, single_skill_cnt, skill_cnt - 1, skill_cnt, max_idx - 2, max_idx - 1, max_idx)) # graph -> list of dict # node: {"type": "skill" or "question", "neighbor": [indices]} qs_graph = [] # ?: is it feasible to get rid of multi-skills? # init graph node_cnt = single_skill_cnt + max_idx - 2 - skill_cnt + 1 for i in range(node_cnt): if i >= 0 and i < single_skill_cnt: qs_graph.append({"type": "skill", "neighbor": []}) else: qs_graph.append({"type": "question", "neighbor": []})
AdaBoostClassifier(), GaussianNB(), LinearDiscriminantAnalysis(), QuadraticDiscriminantAnalysis() ] for name, clf in zip(names, classifiers): scores = cross_val_score(clf, x, y, cv=num_folds) print name + ': ' + str(np.mean(scores)) if __name__ == '__main__': num_grain_comp = 5 num_chord_comp = 5 num_folds = 5 metadata, class_map, subclass_map = get_metadata(stats_files()) if os.path.isfile(stats_pca_path()+'grain_grain_pca_scores.npy'): print 'PCA .npy found, loading.' grain_pca = np.load(stats_pca_path()+'grain_grain_pca_scores.npy') else: x = load_data(stats_files()) grain_pca = get_pca(x) np.save(stats_pca_path()+'grain_grain_pca_scores.npy', grain_pca) chords = load_chords(cord_length_path()) chords_pca = get_chords_pca(chords, use_avg=True) # show the pca plots #plt.scatter(grain_pca[:, 0], grain_pca[:,1], alpha=0.85) #plt.show()
def main(settingsfname, verbose=False): settings = utils.get_settings(settingsfname) subjects = settings['SUBJECTS'] data = utils.get_data(settings, verbose=verbose) metadata = utils.get_metadata() features_that_parsed = [ feature for feature in settings['FEATURES'] if feature in list(data.keys()) ] settings['FEATURES'] = features_that_parsed utils.print_verbose("=====Feature HDF5s parsed=====", flag=verbose) # get model model_pipe = utils.build_model_pipe(settings) utils.print_verbose("=== Model Used ===\n" "{0}\n==================".format(model_pipe), flag=verbose) # dictionary to store results subject_predictions = {} accuracy_scores = {} for subject in subjects: utils.print_verbose("=====Training {0} Model=====".format( str(subject)), flag=verbose) # initialise the data assembler assembler = utils.DataAssembler(settings, data, metadata) X, y = assembler.test_train_discrimination(subject) # get the CV iterator cv = utils.sklearn.cross_validation.StratifiedShuffleSplit( y, random_state=settings['R_SEED'], n_iter=settings['CVITERCOUNT']) # initialise lists for cross-val results predictions = [] labels = [] allweights = [] # run cross validation and report results for train, test in cv: # calculate the weights weights = utils.get_weights(y[train]) # fit the model to the training data model_pipe.fit(X[train], y[train], clf__sample_weight=weights) # append new predictions predictions.append(model_pipe.predict(X[test])) # append test weights to store (why?) (used to calculate auc below) weights = utils.get_weights(y[test]) allweights.append(weights) # store true labels labels.append(y[test]) # stack up the results predictions = utils.np.hstack(predictions) labels = utils.np.hstack(labels) weights = utils.np.hstack(allweights) # calculate the total accuracy accuracy = utils.sklearn.metrics.accuracy_score(labels, predictions, sample_weight=weights) print("Accuracy score for {1}: {0:.3f}".format(accuracy, subject)) # add AUC scores to a subj dict accuracy_scores.update({subject: accuracy}) # store results from each subject subject_predictions[subject] = (predictions, labels, weights) # stack subject results (don't worrry about this line) predictions, labels, weights = map( utils.np.hstack, zip(*list(subject_predictions.values()))) # calculate global accuracy accuracy = utils.sklearn.metrics.accuracy_score(labels, predictions, sample_weight=weights) print( "predicted accuracy score over all subjects: {0:.2f}".format(accuracy)) # output AUC scores to file accuracy_scores.update({'all': accuracy}) settings['DISCRIMINATE'] = 'accuracy_scores.csv' # settings['AUC_SCORE_PATH'] = 'discriminate_scores' utils.output_auc_scores(accuracy_scores, settings) return accuracy_scores
def main(settingsfname, verbose=False): settings = utils.get_settings(settingsfname) subjects = settings['SUBJECTS'] data = utils.get_data(settings, verbose=verbose) metadata = utils.get_metadata() features_that_parsed = [feature for feature in settings['FEATURES'] if feature in list(data.keys())] settings['FEATURES'] = features_that_parsed utils.print_verbose("=====Feature HDF5s parsed=====", flag=verbose) # get model model_pipe = utils.build_model_pipe(settings) utils.print_verbose("=== Model Used ===\n" "{0}\n==================".format(model_pipe), flag=verbose) # dictionary to store results subject_predictions = {} accuracy_scores = {} for subject in subjects: utils.print_verbose( "=====Training {0} Model=====".format(str(subject)), flag=verbose) # initialise the data assembler assembler = utils.DataAssembler(settings, data, metadata) X, y = assembler.test_train_discrimination(subject) # get the CV iterator cv = utils.sklearn.cross_validation.StratifiedShuffleSplit( y, random_state=settings['R_SEED'], n_iter=settings['CVITERCOUNT']) # initialise lists for cross-val results predictions = [] labels = [] allweights = [] # run cross validation and report results for train, test in cv: # calculate the weights weights = utils.get_weights(y[train]) # fit the model to the training data model_pipe.fit(X[train], y[train], clf__sample_weight=weights) # append new predictions predictions.append(model_pipe.predict(X[test])) # append test weights to store (why?) (used to calculate auc below) weights = utils.get_weights(y[test]) allweights.append(weights) # store true labels labels.append(y[test]) # stack up the results predictions = utils.np.hstack(predictions) labels = utils.np.hstack(labels) weights = utils.np.hstack(allweights) # calculate the total accuracy accuracy = utils.sklearn.metrics.accuracy_score(labels, predictions, sample_weight=weights) print("Accuracy score for {1}: {0:.3f}".format(accuracy, subject)) # add AUC scores to a subj dict accuracy_scores.update({subject: accuracy}) # store results from each subject subject_predictions[subject] = (predictions, labels, weights) # stack subject results (don't worrry about this line) predictions, labels, weights = map(utils.np.hstack, zip(*list(subject_predictions.values()))) # calculate global accuracy accuracy = utils.sklearn.metrics.accuracy_score(labels, predictions, sample_weight=weights) print( "predicted accuracy score over all subjects: {0:.2f}".format(accuracy)) # output AUC scores to file accuracy_scores.update({'all': accuracy}) settings['DISCRIMINATE'] = 'accuracy_scores.csv' # settings['AUC_SCORE_PATH'] = 'discriminate_scores' utils.output_auc_scores(accuracy_scores, settings) return accuracy_scores
def convert_to_zscores(self, map_metadata_file=None, overwrite=None): """ convert rectified images to z scores - unthresholded images could be either t or z images - if they are already z then just copy - use metadata supplied by teams to determine image type """ log_to_file(self.dirs.logfile, '\n\n%s' % sys._getframe().f_code.co_name) func_args = inspect.getargvalues(inspect.currentframe()).locals log_to_file(self.dirs.logfile, stringify_dict(func_args)) if overwrite is None: overwrite = self.overwrite if map_metadata_file is None: map_metadata_file = os.path.join( self.dirs.dirs['orig'], 'narps_neurovault_images_details.csv') unthresh_stat_type = get_map_metadata(map_metadata_file) metadata = get_metadata(self.metadata_file) n_participants = metadata[['n_participants', 'NV_collection_string']] n_participants.index = metadata.teamID unthresh_stat_type = unthresh_stat_type.merge(n_participants, left_index=True, right_index=True) for teamID in self.complete_image_sets: if teamID not in unthresh_stat_type.index: print('no map metadata for', teamID) continue # this is a bit of a kludge # since some contrasts include all subjects # but others only include some # we don't have the number of participants in each # group so we just use the entire number n = unthresh_stat_type.loc[teamID, 'n_participants'] for hyp in range(1, 10): infile = self.teams[teamID].images['unthresh']['rectified'][ hyp] if not os.path.exists(infile): print('skipping', infile) continue self.teams[teamID].images['unthresh']['zstat'][ hyp] = os.path.join(self.dirs.dirs['zstat'], self.teams[teamID].datadir_label, 'hypo%d_unthresh.nii.gz' % hyp) if not overwrite and os.path.exists( self.teams[teamID].images['unthresh']['zstat'][hyp]): continue if unthresh_stat_type.loc[teamID, 'unthresh_type'].lower() == 't': if not os.path.exists( os.path.dirname(self.teams[teamID]. images['unthresh']['zstat'][hyp])): os.mkdir( os.path.dirname( self.teams[teamID].images['unthresh']['zstat'] [hyp])) print("converting %s (hyp %d) to z - %d participants" % (teamID, hyp, n)) TtoZ(infile, self.teams[teamID].images['unthresh']['zstat'][hyp], n - 1) elif unthresh_stat_type.loc[teamID, 'unthresh_type'] == 'z': if not os.path.exists( os.path.dirname(self.teams[teamID]. images['unthresh']['zstat'][hyp])): os.mkdir( os.path.dirname( self.teams[teamID].images['unthresh']['zstat'] [hyp])) if not os.path.exists(self.teams[teamID].images['unthresh'] ['zstat'][hyp]): print('copying', teamID) shutil.copy( infile, os.path.dirname( self.teams[teamID].images['unthresh']['zstat'] [hyp])) else: # if it's not T or Z then we skip it as it's not usable print('skipping %s - other data type' % teamID)
from blocks.graph import ComputationGraph, apply_dropout from blocks.algorithms import StepClipping, GradientDescent, CompositeRule, RMSProp from blocks.filter import VariableFilter from blocks.extensions import FinishAfter, Timing, Printing, saveload from blocks.extensions.training import SharedVariableModifier from blocks.extensions.monitoring import DataStreamMonitoring, TrainingDataMonitoring from blocks.monitoring import aggregation from utils import get_metadata, get_stream, track_best, MainLoop from model import nn_fprop from config import config # Load config parameters locals().update(config) # DATA ix_to_char, char_to_ix, vocab_size = get_metadata(hdf5_file) train_stream = get_stream(hdf5_file, 'train', batch_size) dev_stream = get_stream(hdf5_file, 'dev', batch_size) # MODEL x = tensor.matrix('features', dtype='uint8') y = tensor.matrix('targets', dtype='uint8') y_hat, cost, cells = nn_fprop(x, y, vocab_size, hidden_size, num_layers, model) # COST cg = ComputationGraph(cost) if dropout > 0: # Apply dropout only to the non-recurrent inputs (Zaremba et al. 2015) inputs = VariableFilter(theano_name_regex=r'.*apply_input.*')(cg.variables)
def record(record_hash): record = records.get_record_by_hash(record_hash) metadata = utils.get_metadata(record) context = utils.format_response(record, metadata) return flask.render_template('index.html', **context)
def main(): metadata = utils.get_metadata() settings = utils.get_settings('probablygood.gavin.json') settings['R_SEED'] = None # settings['SUBJECTS'] = ['Patient_2'] scaler = sklearn.preprocessing.StandardScaler() thresh = sklearn.feature_selection.VarianceThreshold() # selector = sklearn.feature_selection.SelectKBest() classifier = sklearn.svm.SVC(probability=True) pipe = sklearn.pipeline.Pipeline([('scl', scaler), ('thr', thresh), # ('sel', selector), ('cls', classifier)]) output = {} data = utils.get_data(settings) da = utils.DataAssembler(settings, data, metadata) global_results = {} for subject in list(settings['SUBJECTS']) + ['global']: global_results[subject] = {} for i in range(10): print("iteration {0}".format(i)) for subject in settings['SUBJECTS']: print(subject) X, y = da.build_training(subject) # cv = utils.Sequence_CV(da.training_segments, metadata) train, test, train_results, test_results = fit_and_return_parts_and_results( da, metadata, pipe, X, y) output.update({subject: {'train': train, 'test': test, 'train_results': train_results, 'test_results': test_results}}) # with open('raw_cv_data.pickle', 'wb') as fh: # pickle.dump(output, fh) summary_stats = mean_var_calc(output) for subject in settings['SUBJECTS']: for t in summary_stats[subject]: try: global_results[subject][t] += [summary_stats[subject][t]] except KeyError: global_results[subject][t] = [summary_stats[subject][t]] print(global_results) for subject in settings['SUBJECTS']: for t in global_results[subject]: meanscore = np.mean(global_results[subject][t]) varscore = np.var(global_results[subject][t]) print("For {0} mean {1} was " "{2} with sigma {3}".format(subject, t, meanscore, varscore)) with open('summary_stats.pickle', 'wb') as fh: pickle.dump(global_results, fh)
import sys import torch from PIL import Image from utils import get_device from utils import get_metadata from utils import get_net from utils import get_prediction_class from utils import preprocess_image if __name__ == '__main__': device = get_device() # Training dataset metadata _, class_names, class_to_idx = get_metadata(sys.argv[1]) num_classes = len(class_names) idx_to_class = {value: key for key, value in class_to_idx.items()} # Data preparation image = Image.open(sys.argv[2]) # Net initialization net = get_net(classes=num_classes) checkpoint_dict = torch.load(os.path.join('checkpoint', 'checkpoint.pth'), map_location=device) net.load_state_dict(checkpoint_dict['model_state_dict']) net.eval() net.to(device) # Prediction
"alpha": 0.5, "lambda": 400, "subsample": 0.7, "colsample_bytree": 0.3, "objective": "binary:logistic", "scale_pos_weight": 0.9, "seed": 16, "gpu_id": 0, "tree_method": "gpu_hist", } for artificial in [False, True]: fig, axes = plt.subplots(1, 4, figsize=(20, 15 / (1 + 5 * int(artificial))), sharex=True) feat_imp = dict() X, y = utils.get_metadata(500, artificial=artificial) y = y > 0 for i in np.arange(4): model = xgboost.XGBClassifier(**params).fit(X, y.iloc[:, i]) imp = pd.Series( model.get_booster().get_fscore()).sort_values(ascending=True) imp.plot(kind="barh", ax=axes[i]) fig.tight_layout() plt.show()
def rating_review(): """ Predict personalized review-usefulness Outputs: ------- : predicted ratings, inference time, top reviews with ratings, other reviews with ratings, item metadata """ if request.method == 'POST': ids = request.json user_id = int(ids['uid']) item_id = int(ids['iid']) # Feed the inputs to the Tensorflow Serving model res, time_dif = tf_serving([u_text[user_id].tolist()], [i_text[item_id].tolist()], np.array([[user_id]]), np.array([[item_id]])) # Get the rating and ordered reviews based on their review-usefulness rating = np.array(res['final_rating/add_1:0']).reshape(-1) item_rev_weights = np.array( res['item_rev_weights/transpose_1:0']).reshape(-1) order = np.argsort(item_rev_weights)[::-1] rev_texts = item_rev_original[item_id][:review_num_i] if len(rev_texts) < review_num_i: rev_texts = rev_texts + [''] * (review_num_i - len(rev_texts)) rev_texts = np.array(rev_texts)[order] # Top-3 reviews and other reviews toprevs = [] otherrevs = [] for i, rev_text in enumerate(rev_texts): if rev_text: if i < 3 or len(toprevs) < 3: toprevs.append(rev_text) else: otherrevs.append(rev_text) rev_rate_top = [int(float(df_revrate[toprev])) for toprev in toprevs] rev_rate_other = [ int(float(df_revrate[otherrev])) for otherrev in otherrevs ] # Prepare the metadata for the item des_meta, title_meta, price_meta, imurl_meta, categ_meta = get_metadata( df_meta, item_id, single_pred=True) return json.dumps({ 'rating': rating.tolist(), 'infertime': time_dif.total_seconds(), 'toprevs': toprevs, 'otherrevs': otherrevs, 'rev_rate_top': rev_rate_top, 'rev_rate_other': rev_rate_other, 'des_meta': des_meta, 'title_meta': title_meta, 'price_meta': price_meta, 'imurl_meta': imurl_meta, 'categ_meta': categ_meta }) else: return render_template('candidate.html')
def info_metadata(): res = get_metadata() res['volume'] = lib.SpPlaybackGetVolume() return jsonify(res)
from blocks.algorithms import StepClipping, GradientDescent, CompositeRule, RMSProp from blocks.filter import VariableFilter from blocks.extensions import FinishAfter, Timing, Printing from blocks.extensions.training import SharedVariableModifier from blocks.extensions.monitoring import DataStreamMonitoring, TrainingDataMonitoring from blocks.monitoring import aggregation from blocks.extensions import saveload from utils import get_metadata, get_stream, track_best, MainLoop from model import nn_fprop from config import config # Load config parameters locals().update(config) # DATA ix_to_char, char_to_ix, vocab_size = get_metadata(hdf5_file) train_stream = get_stream(hdf5_file, 'train', batch_size) dev_stream = get_stream(hdf5_file, 'dev', batch_size) # MODEL x = tensor.matrix('features', dtype='uint8') y = tensor.matrix('targets', dtype='uint8') y_hat, cost = nn_fprop(x, y, vocab_size, hidden_size, num_layers, model) # COST cg = ComputationGraph(cost) if dropout > 0: # Apply dropout only to the non-recurrent inputs (Zaremba et al. 2015) inputs = VariableFilter(theano_name_regex=r'.*apply_input.*')(cg.variables)
def api_record(record_hash): metadata = utils.get_metadata(records.get_record_by_hash(record_hash)) return flask.jsonify(**metadata)