def mk_figures(narps, logfile, thresh=0.95): func_name = sys._getframe().f_code.co_name log_to_file(logfile, '%s' % func_name) fig, ax = plt.subplots(7, 1, figsize=(12, 24)) cut_coords = [-24, -10, 4, 18, 32, 52, 64] for i, hyp in enumerate(hypnums): pmap = os.path.join(narps.dirs.dirs['consensus'], 'hypo%d_1-fdr.nii.gz' % hyp) tmap = os.path.join(narps.dirs.dirs['consensus'], 'hypo%d_t.nii.gz' % hyp) pimg = nibabel.load(pmap) timg = nibabel.load(tmap) pdata = pimg.get_fdata() tdata = timg.get_fdata()[:, :, :, 0] threshdata = (pdata > thresh) * tdata threshimg = nibabel.Nifti1Image(threshdata, affine=timg.affine) nilearn.plotting.plot_stat_map(threshimg, threshold=0.1, display_mode="z", colorbar=True, title='hyp %d:' % hyp + hypotheses[hyp], vmax=8, cmap='jet', cut_coords=cut_coords, axes=ax[i]) plt.savefig(os.path.join(narps.dirs.dirs['figures'], 'consensus_map.pdf')) plt.close(fig)
def check_image_values(self, overwrite=None): """ get # of nonzero and NA voxels for each image """ log_to_file(self.dirs.logfile, '\n\n%s' % sys._getframe().f_code.co_name) if overwrite is None: overwrite = self.overwrite image_metadata_file = os.path.join(self.dirs.dirs['metadata'], 'image_metadata_df.csv') if os.path.exists(image_metadata_file) and not overwrite: print('using cached image metdata') image_metadata_df = pandas.read_csv(image_metadata_file) return (image_metadata_df) # otherwise load from scractch image_metadata = [] masker = nilearn.input_data.NiftiMasker(mask_img=self.dirs.MNI_mask) for teamID in self.complete_image_sets: for hyp in self.teams[teamID].images['thresh']['resampled']: threshfile = self.teams[teamID].images['thresh']['resampled'][ hyp] threshdata = masker.fit_transform(threshfile) image_metadata.append([ teamID, hyp, numpy.sum(numpy.isnan(threshdata)), numpy.sum(threshdata == 0.0) ]) image_metadata_df = pandas.DataFrame( image_metadata, columns=['teamID', 'hyp', 'n_na', 'n_nonzero']) image_metadata_df.to_csv(image_metadata_file) return (image_metadata_df)
def get_input_dirs(self, dirs, verbose=True, load_json=True): """ get orig dirs - assumes that images.json is present for each valid dir """ input_files = glob.glob( os.path.join(dirs.dirs['orig'], '*/hypo1_thresh.nii.gz')) input_dirs = [os.path.dirname(i) for i in input_files] log_to_file(self.dirs.logfile, 'found %d input directories' % len(input_dirs)) for i in input_dirs: collection_id = os.path.basename(i) NV_collection_id, teamID = collection_id.split('_') if teamID not in self.teams: self.teams[teamID] = NarpsTeam(teamID, NV_collection_id, dirs, verbose=self.verbose) if os.path.exists(os.path.join(i, 'images.json')): self.teams[teamID].jsonfile = os.path.join( i, 'images.json') with open(self.teams[teamID].jsonfile) as f: self.teams[teamID].image_json = json.load(f)
def get_figi_history( self, figi: str, start: dt.datetime, end: dt.datetime, interval: str ): """ Get history for a given figi identifier :param figi: :param start: :param end: :param interval: :return: """ hist = None count = 0 while not hist and count < SLEEP_TRIES: count += 1 try: hist = self.market.get_candles( figi=figi, _from=start.isoformat(), to=end.isoformat(), interval=interval, ) except Exception as e: log_to_file(e) log_to_file(f"Sleep {SLEEP_TIME} seconds") time.sleep(SLEEP_TIME) if self.verbose: print("Received market response:", hist.payload.candles) candles = hist.payload.candles candles_dicts = [candles[i].to_dict() for i in range(len(candles))] df = pd.DataFrame(candles_dicts) return df
def order_view(request, pk): t = TranslationRequest.objects.get(pk=pk) check_stage(t.status, 'selected_quote') t.reference = "%s-%s" % (str(pk), str(uuid.uuid4())) t.order_name = "Aldryn Translator Order" # COPY OLD LANG PAGE TREE TO NEW ONE if t.copy_content: copy_pages(t.from_lang, t.to_lang, t.pages) data = prepare_data(t, t.from_lang, t.to_lang, plugin_source_lang=t.to_lang) data.update(prepare_order_data(request, t)) order = json.loads(get_order(t.provider, data)) if log_to_file_enabled(): log_to_file(data) t.sent_content = json.dumps(data, ensure_ascii=False).encode('ascii', 'xmlcharrefreplace') t.status = 'requested' t.save() # TODO: save other stuff from response to model (deadline, price, more?) if t.provider == 'supertext': return render_to_response( 'aldryn_translator/confirmation.html', {'r': order}, context_instance=RequestContext(request)) else: raise NotImplementedError()
def run_all_analyses(narps, simulate_noise=False): logfile = os.path.join(narps.dirs.dirs['logs'], 'ThresholdSimulation.log') log_to_file(logfile, 'Running thresholding simulation', flush=True) # get team results to add to table all_metadata = get_all_metadata(narps) mean_decision = all_metadata.groupby('varnum').Decision.mean() all_results = [] for hyp in range(1, 10): results, mean_fdr_thresh, meta_results, roisize = get_activations( narps, hyp, logfile, simulate_noise=simulate_noise) mean_results = (results > 0).mean(0) r = [ hyp, roisize, mean_decision.loc[hyp], mean_results[0], mean_results[1], meta_results[0], meta_results[1] ] all_results.append(r) results_df = pandas.DataFrame( all_results, columns=[ 'Hypothesis', 'N voxels in ROI', 'proportion of teams reporting act.', 'proportion of teams w/ act. (%s)' % results.columns[0], 'proportion of teams w/ act. (%s)' % results.columns[1], 'CBMA (n voxels in ROI)', 'IBMA (n voxels in ROI)' ]) results_df.to_csv(os.path.join(narps.dirs.dirs['ThresholdSimulation'], 'simulation_results.csv'), index=False) return (results_df)
def estimate_smoothness(self, overwrite=None, imgtype='zstat'): """ estimate smoothness of Z maps using FSL's smoothness estimation """ log_to_file( self.dirs.logfile, sys._getframe().f_code.co_name, headspace=2) func_args = inspect.getargvalues( inspect.currentframe()).locals log_to_file( self.dirs.logfile, stringify_dict(func_args)) if overwrite is None: overwrite = self.overwrite output_file = os.path.join(self.dirs.dirs['metadata'], 'smoothness_est.csv') if os.path.exists(output_file) and not overwrite: if self.verbose: print('using existing smoothness file') smoothness_df = pandas.read_csv(output_file) return(smoothness_df) # use nipype's interface to the FSL smoothest command est = SmoothEstimate() smoothness = [] for teamID in self.complete_image_sets['unthresh']: for hyp in range(1, 10): if hyp not in self.teams[teamID].images['unthresh'][imgtype]: # fill missing data with nan print('no zstat present for', teamID, hyp) smoothness.append([teamID, hyp, numpy.nan, numpy.nan, numpy.nan]) continue infile = self.teams[teamID].images['unthresh'][imgtype][hyp] if not os.path.exists(infile): print('no image present:', infile) continue else: if self.verbose: print('estimating smoothness for hyp', hyp) est.inputs.zstat_file = infile est.inputs.mask_file = self.dirs.MNI_mask est.terminal_output = 'file_split' smoothest_output = est.run() smoothness.append([teamID, hyp, smoothest_output.outputs.dlh, smoothest_output.outputs.volume, smoothest_output.outputs.resels]) self.teams[teamID].logs['smoothest'] = ( smoothest_output.runtime.stdout, smoothest_output.runtime.stderr) smoothness_df = pandas.DataFrame( smoothness, columns=['teamID', 'hyp', 'dhl', 'volume', 'resels']) smoothness_df.to_csv(output_file) return(smoothness_df)
def compute_image_stats(self, datatype='zstat', overwrite=None): """ compute std and range on statistical images """ log_to_file( self.dirs.logfile, sys._getframe().f_code.co_name, headspace=2) func_args = inspect.getargvalues( inspect.currentframe()).locals log_to_file( self.dirs.logfile, stringify_dict(func_args)) if overwrite is None: overwrite = self.overwrite # set up directories unthresh_concat_dir = self.dirs.get_output_dir( 'unthresh_concat_%s' % datatype) unthresh_range_dir = self.dirs.get_output_dir( 'unthresh_range_%s' % datatype) unthresh_std_dir = self.dirs.get_output_dir( 'unthresh_std_%s' % datatype) for hyp in range(1, 10): unthresh_file = os.path.join( unthresh_concat_dir, 'hypo%d.nii.gz' % hyp) range_outfile = os.path.join( unthresh_range_dir, 'hypo%d.nii.gz' % hyp) std_outfile = os.path.join( unthresh_std_dir, 'hypo%d.nii.gz' % hyp) if not os.path.exists(range_outfile) \ or not os.path.exists(std_outfile) \ or overwrite: unthresh_img = nibabel.load(unthresh_file) unthresh_data = unthresh_img.get_data() concat_data = numpy.nan_to_num(unthresh_data) # compute range datarange = numpy.max(concat_data, axis=3) \ - numpy.min(concat_data, axis=3) range_img = nibabel.Nifti1Image( datarange, affine=unthresh_img.affine) range_img.to_filename(range_outfile) # compute standard deviation datastd = numpy.std(concat_data, axis=3) std_img = nibabel.Nifti1Image( datastd, affine=unthresh_img.affine) std_img.to_filename(std_outfile)
def __init__(self, basedir, data_url=None, force_download=False): # set up a dictionary to contain all of the # directories self.dirs = {} # check to make sure home of basedir exists assert os.path.exists(os.path.dirname(basedir)) self.dirs['base'] = basedir if not os.path.exists(basedir): os.mkdir(basedir) self.force_download = force_download if data_url is None: self.data_url = DATA_URL dirs_to_add = ['output', 'metadata', 'cached', 'figures', 'logs', 'orig'] for d in dirs_to_add: self.dirs[d] = os.path.join(self.dirs['base'], d) self.dirs['templates'] = os.path.join( os.environ['FSLDIR'], 'data/standard') # autogenerate all of the directories # except for the orig dir for d in dirs_to_add: if d != 'orig' and not os.path.exists(self.dirs[d]): os.mkdir(self.dirs[d]) self.logfile = os.path.join(self.dirs['logs'], 'narps.txt') log_to_file(self.logfile, 'Running Narps main class', flush=True) output_dirs = ['resampled', 'rectified', 'zstat', 'thresh_mask_orig'] for o in output_dirs: self.get_output_dir(o) # if raw data don't exist, download them if self.force_download and os.path.exists(self.dirs['orig']): shutil.rmtree(self.dirs['orig']) if not os.path.exists(self.dirs['orig']): self.get_orig_data() assert os.path.exists(self.dirs['orig']) # make sure the necessary templates are present # these should be downloaded with the raw data self.MNI_mask = os.path.join(self.dirs['templates'], 'MNI152_T1_2mm_brain_mask.nii.gz') assert os.path.exists(self.MNI_mask) self.MNI_template = os.path.join(self.dirs['templates'], 'MNI152_T1_2mm.nii.gz') assert os.path.exists(self.MNI_template) self.full_mask_img = os.path.join(self.dirs['templates'], 'MNI152_all_voxels.nii.gz')
def get_binarized_thresh_masks(self): """ create binarized thresholded maps for each team """ log_to_file(self.dirs.logfile, '\n\n%s' % sys._getframe().f_code.co_name) for teamID in self.complete_image_sets: self.teams[teamID].create_binarized_thresh_masks()
def compute_image_stats(self, datatype='zstat', overwrite=None): """ compute std and range on statistical images """ log_to_file( self.dirs.logfile, '\n\n%s' % sys._getframe().f_code.co_name) func_args = inspect.getargvalues( inspect.currentframe()).locals log_to_file( self.dirs.logfile, stringify_dict(func_args)) if overwrite is None: overwrite = self.overwrite for hyp in range(1, 10): unthresh_file = os.path.join( self.dirs.dirs['output'], 'unthresh_concat_%s/hypo%d.nii.gz' % (datatype, hyp)) range_outfile = os.path.join( self.dirs.dirs['output'], 'unthresh_range_%s/hypo%d.nii.gz' % (datatype, hyp)) if not os.path.exists(os.path.join( self.dirs.dirs['output'], 'unthresh_range_%s' % datatype)): os.mkdir(os.path.join( self.dirs.dirs['output'], 'unthresh_range_%s' % datatype)) std_outfile = os.path.join( self.dirs.dirs['output'], 'unthresh_std_%s/hypo%d.nii.gz' % (datatype, hyp)) if not os.path.exists(os.path.join( self.dirs.dirs['output'], 'unthresh_std_%s' % datatype)): os.mkdir(os.path.join( self.dirs.dirs['output'], 'unthresh_std_%s' % datatype)) if not os.path.exists(range_outfile) \ or not os.path.exists(std_outfile) \ or overwrite: unthresh_img = nibabel.load(unthresh_file) unthresh_data = unthresh_img.get_data() concat_data = numpy.nan_to_num(unthresh_data) datarange = numpy.max(concat_data, axis=3) \ - numpy.min(concat_data, axis=3) range_img = nibabel.Nifti1Image( datarange, affine=unthresh_img.affine) range_img.to_filename(range_outfile) datastd = numpy.std(concat_data, axis=3) std_img = nibabel.Nifti1Image( datastd, affine=unthresh_img.affine) std_img.to_filename(std_outfile)
def verify_service_content(url, request_body, content): headers = {'content-type': 'text/xml'} try: start = datetime.datetime.now() r = requests.post(url, data=request_body, headers=headers) end = datetime.datetime.now() except Exception, e: utils.log_to_file(e) return 0
def __init__(self, basedir, data_url=None, force_download=False): # set up directories and template files self.dirs = {} # check to make sure home of basedir exists assert os.path.exists(os.path.dirname(basedir)) self.dirs['base'] = basedir if not os.path.exists(basedir): os.mkdir(basedir) self.force_download = force_download if data_url is None: self.data_url = DATA_URL self.dirs['orig'] = os.path.join(self.dirs['base'], 'orig') self.dirs['templates'] = os.path.join(self.dirs['base'], 'templates') self.dirs['output'] = os.path.join(self.dirs['base'], 'maps') self.dirs['metadata'] = os.path.join(self.dirs['base'], 'metadata') self.dirs['cached'] = os.path.join(self.dirs['base'], 'cached') self.dirs['figures'] = os.path.join(self.dirs['base'], 'figures') self.dirs['logs'] = os.path.join(self.dirs['base'], 'logs') dirs_to_make = ['output', 'metadata', 'cached', 'figures', 'logs'] for d in dirs_to_make: if not os.path.exists(self.dirs[d]): os.mkdir(self.dirs[d]) self.logfile = os.path.join(self.dirs['logs'], 'narps.txt') log_to_file(self.logfile, 'Running Narps main class', flush=True) output_dirs = ['resampled', 'rectified', 'zstat', 'thresh_mask_orig', 'concat_thresh'] for o in output_dirs: self.dirs[o] = os.path.join(self.dirs['output'], o) if not os.path.exists(self.dirs[o]): os.mkdir(self.dirs[o]) # if raw data don't exist, download them if not os.path.exists(self.dirs['orig']) or self.force_download: self.get_orig_data() assert os.path.exists(self.dirs['orig']) self.MNI_mask = os.path.join(self.dirs['templates'], 'MNI152_T1_2mm_brain_mask.nii.gz') assert os.path.exists(self.MNI_mask) self.MNI_template = os.path.join(self.dirs['templates'], 'MNI152_T1_2mm.nii.gz') assert os.path.exists(self.MNI_template) self.full_mask_img = os.path.join(self.dirs['templates'], 'MNI152_all_voxels.nii.gz') # templates should also be downloaded with orig data assert os.path.exists(self.dirs['templates'])
def get_etfs_daily_history( self, end=dt.datetime.now(MOSCOW_TIMEZONE) - dt.timedelta(days=1), start=dt.datetime.now(MOSCOW_TIMEZONE) - dt.timedelta(weeks=52, days=1), ): """ Get daily market history (1 point per day) with exactly the requested interval. Note: Due to API restrictions, an interval longer than a year must be divided into years when fetched """ interval = "day" interval_dt = dt.timedelta(days=1) print( f"Requesting ETF history from {start} till {end} with an interval={interval}" ) length = end.astimezone(MOSCOW_TIMEZONE) - start.astimezone(MOSCOW_TIMEZONE) # The server bugs if the time period is smaller than 1 interval if length < interval_dt: start = end - interval_dt length = interval_dt one_period = dt.timedelta(weeks=52) n_periods = int(math.ceil(length / one_period)) periods = [ [end - one_period * (i + 1), end - one_period * i] for i in range(n_periods) ] periods[-1][0] = start periods = periods[::-1] dfs = [] out, tickers = pd.DataFrame(), [] for period in tqdm(periods, desc=f"Getting forecast with interval={interval}"): if self.verbose: print(f"Requesting history from {period[0]} till {period[1]}.") df, tickers = self.get_etfs_history( start=period[0], end=period[1], freq=interval ) if df.empty: print( f"Server returned an empty reply for the following period: {period}!" ) continue dfs.append(df) if dfs: out = pd.concat(dfs, axis=0, ignore_index=True) l1 = len(out) out.drop(out[pd.isna(out.time)].index, inplace=True) if len(out) < l1: log_to_file(f"{l1 - len(out)} NaN time stamps dropped.") return out, tickers
def get_resampled_images(self, overwrite=None): """ resample all images into FSL MNI space """ log_to_file(self.dirs.logfile, '\n\n%s' % sys._getframe().f_code.co_name) if overwrite is None: overwrite = self.overwrite for teamID in self.complete_image_sets: self.teams[teamID].get_resampled_images()
def __init__(self, basedir, metadata_file=None, verbose=False, overwrite=False, dataurl=None, testing=False): self.basedir = basedir self.dirs = NarpsDirs(basedir, dataurl=dataurl, testing=testing) self.verbose = verbose self.teams = {} self.overwrite = overwrite self.started_at = datetime.datetime.now() self.testing = testing # create the full mask image if it doesn't already exist if not os.path.exists(self.dirs.full_mask_img): print('making full image mask') self.mk_full_mask_img(self.dirs) assert os.path.exists(self.dirs.full_mask_img) # get input dirs for orig data self.image_jsons = None self.input_dirs = self.get_input_dirs(self.dirs) # check images for each team self.complete_image_sets = {} self.get_orig_images(self.dirs) for imgtype in ['thresh', 'unthresh']: log_to_file( self.dirs.logfile, 'found %d teams with complete original %s datasets' % (len(self.complete_image_sets[imgtype]), imgtype)) # set up metadata if metadata_file is None: self.metadata_file = os.path.join( self.dirs.dirs['orig'], 'analysis_pipelines_for_analysis.xlsx') else: self.metadata_file = metadata_file self.metadata = get_metadata(self.metadata_file) self.hypothesis_metadata = pandas.DataFrame( columns=['teamID', 'hyp', 'n_na', 'n_zero']) self.all_maps = { 'thresh': { 'resampled': None }, 'unthresh': { 'resampled': None } } self.rectified_list = []
def get_resampled_images(self, overwrite=None): """ resample all images into FSL MNI space """ log_to_file(self.dirs.logfile, sys._getframe().f_code.co_name, headspace=2) if overwrite is None: overwrite = self.overwrite for imgtype in ['thresh', 'unthresh']: for teamID in self.complete_image_sets[imgtype]: self.teams[teamID].get_resampled_images(imgtype=imgtype)
def create_concat_images(self, datatype='resampled', imgtypes=None, overwrite=None): """ create images concatenated across teams ordered by self.complete_image_sets """ log_to_file(self.dirs.logfile, '\n\n%s' % sys._getframe().f_code.co_name) func_args = inspect.getargvalues(inspect.currentframe()).locals log_to_file(self.dirs.logfile, stringify_dict(func_args)) if imgtypes is None: imgtypes = ['thresh', 'unthresh'] if overwrite is None: overwrite = self.overwrite for imgtype in imgtypes: self.dirs.dirs['concat_%s' % imgtype] = os.path.join( self.dirs.dirs['output'], '%s_concat_%s' % (imgtype, datatype)) for hyp in range(1, 10): outfile = os.path.join(self.dirs.dirs['concat_%s' % imgtype], 'hypo%d.nii.gz' % hyp) if not os.path.exists(os.path.dirname(outfile)): os.mkdir(os.path.dirname(outfile)) if not os.path.exists(outfile) or overwrite: if self.verbose: print('%s - hypo %d: creating concat file' % (imgtype, hyp)) concat_teams = [ teamID for teamID in self.complete_image_sets if os.path.exists(self.teams[teamID].images[imgtype] [datatype][hyp]) ] self.all_maps[imgtype][datatype] = [ self.teams[teamID].images[imgtype][datatype][hyp] for teamID in concat_teams ] # use nilearn NiftiMasker to load data # and save to a new file masker = nilearn.input_data.NiftiMasker( mask_img=self.dirs.MNI_mask) concat_data = masker.fit_transform( self.all_maps[imgtype][datatype]) concat_img = masker.inverse_transform(concat_data) concat_img.to_filename(outfile) else: if self.verbose: print('%s - hypo %d: using existing file' % (imgtype, hyp)) return (self.all_maps)
def get_thresh_similarity(narps, dataset='resampled'): """ For each pair of thresholded images, compute the similarity of the thresholded/binarized maps using the Jaccard coefficient. Computation with zeros per https://stackoverflow.com/questions/37003272/how-to-compute-jaccard-similarity-from-a-pandas-dataframe # noqa also add computation of jaccard on only nonzero pairs (ala scipy) """ func_args = inspect.getargvalues(inspect.currentframe()).locals func_name = sys._getframe().f_code.co_name logfile = os.path.join( narps.dirs.dirs['logs'], '%s-%s.txt' % (sys.argv[0].split('.')[0], func_name)) log_to_file(logfile, '%s' % func_name, flush=True) log_to_file(logfile, stringify_dict(func_args)) output_dir = os.path.join(narps.dirs.dirs['output'], 'jaccard_thresh') if not os.path.exists(output_dir): os.mkdir(output_dir) for hyp in hypnums: print('creating Jaccard map for hypothesis', hyp) maskdata, labels = get_masked_data(hyp, narps.dirs.MNI_mask, narps.dirs.dirs['output'], imgtype='thresh', dataset=dataset) jacsim = 1 - pairwise_distances(maskdata, metric="hamming") jacsim_nonzero = 1 - squareform(pdist(maskdata, 'jaccard')) df = pandas.DataFrame(jacsim, index=labels, columns=labels) df.to_csv(os.path.join(output_dir, 'jacsim_thresh_hyp%d.csv' % hyp)) df_nonzero = pandas.DataFrame(jacsim_nonzero, index=labels, columns=labels) df_nonzero.to_csv( os.path.join(output_dir, 'jacsim_nonzero_thresh_hyp%d.csv' % hyp)) seaborn.clustermap(df, cmap='jet', figsize=(16, 16), method='ward') plt.title(hypotheses[hyp]) plt.savefig( os.path.join(narps.dirs.dirs['figures'], 'hyp%d_jaccard_map_thresh.pdf' % hyp)) plt.close() seaborn.clustermap(df_nonzero, cmap='jet', figsize=(16, 16), method='ward') plt.title(hypotheses[hyp]) plt.savefig( os.path.join(narps.dirs.dirs['figures'], 'hyp%d_jaccard_nonzero_map_thresh.pdf' % hyp)) plt.close()
def plot_distance_from_mean(narps): func_name = sys._getframe().f_code.co_name logfile = os.path.join(narps.dirs.dirs['logs'], 'AnalyzeMaps-%s.txt' % func_name) log_to_file(logfile, '%s' % func_name, flush=True) median_corr_df = pandas.read_csv( os.path.join(narps.dirs.dirs['metadata'], 'median_pattern_corr.csv')) # Plot distance from mean across teams plt.bar(median_corr_df.index, median_corr_df.median_corr) plt.savefig(os.path.join(narps.dirs.dirs['figures'], 'median_corr_sorted.pdf'), bbox_inches='tight') plt.close() # This plot is limited to the teams with particularly # low median correlations (<.2) median_corr_low = median_corr_df.query('median_corr < 0.2') log_to_file( logfile, 'found %d teams with r<0.2 with mean pattern' % median_corr_low.shape[0]) log_to_file(logfile, median_corr_low.iloc[:, 0].values) median_corr_high = median_corr_df.query('median_corr > 0.7') log_to_file( logfile, 'found %d teams with r>0.7 with mean pattern' % median_corr_high.shape[0])
def evaluate(weights_file, image_model_name='vgg16', embedding_type='glove', embedding_dim=300, batch_size=200): # create data loader data_path = os.path.join(os.path.dirname(__file__), 'data') option = { 'data_root': data_path, # MODIFY PATH ACCORDINGLY 'fine_size': 224, 'word_embedding_length': 1024, 'randomize': False } data_loader = DataLoaderDisk(**option) word_index = data_loader.tokenizer.word_index embedding_path = '' if embedding_type == 'glove': embedding_path = os.path.join( data_path, 'glove.6B', 'glove.6B.{0}d.txt'.format(embedding_dim)) embedding_matrix = get_embedding_matrix(word_index, embedding_type, embedding_path) seq_length = 25 model_val = vqa_model(image_model_name, embedding_matrix, seq_length, dropout_rate=0.5, num_classes=3131) model_val.load_weights(weights_file) epochs = 1 iters = int(data_loader.val_num * epochs / batch_size) val_accuracy = 0 for iteration in tqdm(range(iters)): img_batch_val, que_batch_val, y_batch_val = data_loader.next_batch( batch_size, mode='val') val_score = model_val.test_on_batch([img_batch_val, que_batch_val], y_batch_val) val_accuracy += float(val_score[1]) msg = 'iter = {0}, val acc: {1:03f}'.format(iteration, float(val_score[1])) log_to_file(msg) msg = 'Overall Accuracy on Validation-Set: {0}'.format(val_accuracy / iters) log_to_file(msg)
def _load_data(self): try: self._data = pd.read_csv(self.data_file) # Drop nans in time stamps l1 = len(self._data) self._data.drop(self._data[pd.isna(self._data.time)].index, inplace=True) if len(self._data) < l1: log_to_file(f"{l1 - len(self._data)} NaN time stamps dropped.") self._data.time = pd.to_datetime(self._data.time) with open(self.tickers_file, "r") as f: self._tickers = f.read().split() print("Local history data loaded successfully") except FileNotFoundError: print("No saved ETF history found locally.")
def get_thresh_similarity(narps, dataset='resampled'): """ For each pair of thresholded images, compute the similarity of the thresholded/binarized maps using the Jaccard coefficient. Computation with zeros per https://stackoverflow.com/questions/37003272/how-to-compute-jaccard-similarity-from-a-pandas-dataframe # noqa also add computation of jaccard on only nonzero pairs (ala scipy) """ func_args = inspect.getargvalues(inspect.currentframe()).locals func_name = sys._getframe().f_code.co_name logfile = os.path.join(narps.dirs.dirs['logs'], 'AnalyzeMaps-%s.txt' % func_name) log_to_file(logfile, '%s' % func_name, flush=True) log_to_file(logfile, stringify_dict(func_args)) for hyp in hypnums: print('analyzing thresh similarity for hypothesis', hyp) maskdata, labels = get_concat_data(hyp, narps.dirs.MNI_mask, narps.dirs.dirs['output'], imgtype='thresh', dataset=dataset) pctagree = matrix_pct_agreement(maskdata) median_pctagree = numpy.median(pctagree[numpy.triu_indices_from( pctagree, 1)]) log_to_file( logfile, 'hyp %d: median pctagree similarity: %f' % (hyp, median_pctagree)) df_pctagree = pandas.DataFrame(pctagree, index=labels, columns=labels) df_pctagree.to_csv( os.path.join(narps.dirs.dirs['metadata'], 'pctagree_hyp%d.csv' % hyp)) seaborn.clustermap(df_pctagree, cmap='jet', figsize=(16, 16), method='ward') plt.title(hypotheses_full[hyp]) plt.savefig(os.path.join(narps.dirs.dirs['figures'], 'hyp%d_pctagree_map_thresh.pdf' % hyp), bbox_inches='tight') plt.savefig(os.path.join(narps.dirs.dirs['figures'], 'hyp%d_pctagree_map_thresh.png' % hyp), bbox_inches='tight') plt.close() # get jaccard for nonzero voxels jacsim_nonzero = 1 - squareform(pdist(maskdata, 'jaccard')) median_jacsim_nonzero = numpy.median( jacsim_nonzero[numpy.triu_indices_from(jacsim_nonzero, 1)]) log_to_file( logfile, 'hyp %d: median jacaard similarity (nonzero): %f' % (hyp, median_jacsim_nonzero))
def setup_simulated_data( narps, verbose=False, overwrite=False): """create directories for simulated data""" # consensus analysis must exist assert os.path.exists(narps.dirs.dirs['consensus']) basedir = narps.basedir + '_simulated' if verbose: print("writing files to new directory:", basedir) if not os.path.exists(os.path.join(basedir, 'logs')): os.makedirs(os.path.join(basedir, 'logs')) log_to_file(os.path.join(basedir, 'logs/simulated_data.log'), 'Creating simulated dataset', flush=True) # copy data from orig/templates origdir = narps.dirs.dirs['orig'] new_origdir = os.path.join(basedir, 'orig') templatedir = narps.dirs.dirs['templates'] if verbose: print('using basedir:', basedir) if os.path.exists(basedir) and overwrite: shutil.rmtree(basedir) if not os.path.exists(basedir): os.mkdir(basedir) if not os.path.exists(new_origdir) or overwrite: if verbose: print('copying template data to new basedir') shutil.copytree( templatedir, os.path.join(basedir, 'templates')) if not os.path.exists(new_origdir): os.mkdir(new_origdir) # copy metadata files from orig for f in glob.glob(os.path.join( origdir, '*.*')): if os.path.isfile(f): if verbose: print('copying', f, 'to', new_origdir) shutil.copy(f, new_origdir) else: print('using existing new basedir') return(basedir)
def log_data(download_dir, logfile, verbose=True): """record manifest and file hashes""" imgfiles = {} # traverse root directory, and list directories as dirs and files as files for root, _, files in os.walk(download_dir): path = root.split(os.sep) for file in files: if file.find('.nii.gz') < 0: # skip non-nifti files continue fname = os.path.join(root, file) filehash = hashlib.md5(open(fname, 'rb').read()).hexdigest() short_fname = os.path.join('/'.join(path[-2:]), file) imgfiles[short_fname] = filehash if verbose: print(short_fname, filehash) log_to_file(logfile, '%s %s' % (short_fname, filehash))
def create_mean_thresholded_images(self, datatype='resampled', overwrite=None, thresh=1e-5): """ create overlap maps for thresholded images """ log_to_file( self.dirs.logfile, sys._getframe().f_code.co_name, headspace=2) func_args = inspect.getargvalues( inspect.currentframe()).locals log_to_file( self.dirs.logfile, stringify_dict(func_args)) imgtype = 'thresh' if overwrite is None: overwrite = self.overwrite output_dir = self.dirs.get_output_dir('overlap_binarized_thresh') concat_dir = self.dirs.get_output_dir( '%s_concat_%s' % (imgtype, datatype)) for hyp in range(1, 10): outfile = os.path.join( output_dir, 'hypo%d.nii.gz' % hyp) if not os.path.exists(outfile) or overwrite: if self.verbose: print('%s - hypo %d: creating overlap file' % ( imgtype, hyp)) concat_file = os.path.join( concat_dir, 'hypo%d.nii.gz' % hyp) concat_img = nibabel.load(concat_file) concat_data = concat_img.get_data() concat_data = (concat_data > thresh).astype('float') concat_mean = numpy.mean(concat_data, 3) concat_mean_img = nibabel.Nifti1Image(concat_mean, affine=concat_img.affine) concat_mean_img.to_filename(outfile) else: if self.verbose: print('%s - hypo %d: using existing file' % ( imgtype, hyp))
def copy_renamed_files(collectionIDs, download_dir, logfile): """change file names based on info in images.json""" # setup target directory orig_dir = os.path.join(os.path.dirname(download_dir), 'orig') if not os.path.exists(orig_dir): os.mkdir(orig_dir) for teamID in collectionIDs: collectionID = '%s_%s' % (collectionIDs[teamID], teamID) collection_dir = os.path.join(download_dir, collectionID) fixed_dir = os.path.join(orig_dir, collectionID) if not os.path.exists(fixed_dir): os.mkdir(fixed_dir) jsonfile = os.path.join(collection_dir, 'images.json') if not os.path.exists(jsonfile): print('no json file for ', collectionID) continue with open(jsonfile) as f: image_info = json.load(f) for img in image_info: origname = os.path.basename(img['file']) # fix various issues with names newname = img['name'].replace('tresh', 'thresh').replace( ' ', '_') + '.nii.gz' newname = newname.replace('hypo_', 'hypo').replace( 'uthresh', 'unthresh').replace('_LR', '') # skip unthresh images if necessary if newname.find('unthresh') > -1 and \ teamID in TEAMS_TO_REMOVE_UNTHRESH: continue if origname.find('sub') > -1 or \ not newname.find('thresh') > -1: # skip sub files continue else: log_to_file( logfile, 'copying %s/%s to %s/%s' % (collectionID, origname, collectionID, newname)) shutil.copy(os.path.join(collection_dir, origname), os.path.join(fixed_dir, newname)) return (orig_dir)
def get_input_dirs(self, dirs, verbose=True, load_json=True): """ get orig dirs - assumes that images.json is present for each valid dir """ input_jsons = glob.glob( os.path.join(dirs.dirs['orig'], '*/images.json')) log_to_file( self.dirs.logfile, 'found %d input directories' % len(input_jsons)) if load_json: for i in input_jsons: collection_id = os.path.basename(os.path.dirname(i)) NV_collection_id, teamID = collection_id.split('_') if teamID not in self.teams: self.teams[teamID] = NarpsTeam( teamID, NV_collection_id, dirs, verbose=self.verbose) self.teams[teamID].jsonfile = i with open(i) as f: self.teams[teamID].image_json = json.load(f)
def export_plugins(from_lang, plugin_list, plugin_selection=None): field_blacklist = get_blacklist() plugin_data = [] for plugin in plugin_list: try: instance = plugin.get_plugin_instance()[0] except KeyError as e: # Nasty fix for StackPlugins still straying around if str(e) != "u'StackPlugin'": raise KeyError(str(e)) continue if instance is None: continue elif plugin_selection and str(type(instance)) not in plugin_selection: continue if getattr(instance, "language") == from_lang: # TODO: check: could this break at some point? plugin_contents = plugin.get_plugin_instance()[0].get_translatable_content() if plugin_contents: if not isinstance(plugin_contents, list): plugin_contents = [plugin_contents] for item in plugin_contents: plugin_dict = { 'plugin_pk': getattr(plugin, "pk"), 'plugin_type': "%s%s" % (instance.__class__.__name__, " (%s)" % str(type(instance)) or ""), 'fields': {} } for key, value in item.items(): if not key in field_blacklist: plugin_dict['fields'][key] = value plugin_data.append(plugin_dict) if log_to_file_enabled(): log_to_file(plugin_data) return plugin_data
def save_violation(v): try: file = open("./violations/violations.json", 'r') violations_json = json.loads(file.read()) file.close() if v in violations_json.keys(): violations_json[v] = int(violations_json.get(v)) + 1 try: file = open("./violations/violations.json", 'w') file.write(json.dumps(violations_json)) file.close() except: log_to_file("Failed to write to violations.json at" + time.strftime("%b %d, %Y - %I:%M:%S")) else: violations_json[v] = 1 try: file = open("./violations/violations.json", 'w') file.write(json.dumps(violations_json)) file.close() except: log_to_file("Failed to write to violations.json at" + time.strftime("%b %d, %Y - %I:%M:%S")) except: log_to_file("Failed to read violations.json at" + time.strftime("%b %d, %Y - %I:%M:%S")) file = open("./violations/violations.json", 'w') file.write("{}") file.close()
def save_user(user): try: file = open("./users/users.json", 'r') users_json = json.loads(file.read()) file.close() if user in users_json.keys(): users_json[user] = int(users_json.get(user)) + 1 try: file = open("./users/users.json", 'w') file.write(json.dumps(users_json)) file.close() except: log_to_file("Failed to write to users.json at " + time.strftime("%b %d, %Y - %I:%M:%S")) else: users_json[user] = 1 try: file = open("./users/users.json", 'w') file.write(json.dumps(users_json)) file.close() except: log_to_file("Failed to write to users.json at" + time.strftime("%b %d, %Y - %I:%M:%S")) except: log_to_file("Failed to read users.json at" + time.strftime("%b %d, %Y - %I:%M:%S")) file = open("./users/users.json", 'w') file.write("{}") file.close()
def get_orig_data(self): """ download original data from repository """ log_to_file(self.logfile, '\n\nget_orig_data') log_to_file(self.logfile, 'DATA_URL: %s' % DATA_URL) MAX_TRIES = 5 if self.data_url is None: print('no URL for original data, cannot download') print('should be specified in info.json') return print('orig data do not exist, downloading...') output_directory = self.dirs['base'] no_dl = True ntries = 0 # try several times in case of http error while no_dl: try: filename = wget.download(self.data_url, out=output_directory) no_dl = False except HTTPError: ntries += 1 time.sleep(1) # wait a second if ntries > MAX_TRIES: raise Exception('Problem downloading original data') # save a hash of the tarball for data integrity filehash = hashlib.md5(open(filename, 'rb').read()).hexdigest() log_to_file(self.logfile, 'hash of tar file: %s' % filehash) tarfile_obj = tarfile.open(filename) tarfile_obj.extractall(path=self.dirs['base']) os.remove(filename)
def parse_comment(c): try: file = open("./data-analyzation/words.json", 'r') words_json = json.loads(file.read()) file.close() print("Reading comment " + c.id + " at " + time.strftime("%b %d, %Y - %I:%M:%S") + " by " + str(c.author)) body = str(c.body.encode('utf-8')) for w in body.split(' '): word = str(w.encode('utf-8')) if "http" in word or "/u/" in word or "/r/" in word or "\\" in word: continue for ch in replace_chars: word = word.replace(ch, '') if word is not "" and word is not " " and word is not "'" and word is not "," and word is not '\n' and word[ 0] is not "'": if word in words_json.keys(): words_json[word] = int(words_json.get(word)) + 1 else: words_json[word] = 1 file = open("./data-analyzation/words.json", 'w') file.write(json.dumps(words_json)) file.close() except: log_to_file("Failed to read words.json at " + time.strftime("%b %d, %Y - %I:%M:%S")) return
def get_quote_view(request, pk): t = TranslationRequest.objects.get(pk=pk) check_stage(t.status, 'selected_content') if request.method == 'POST': if request.POST.get('opt'): t.order_choice = request.POST.get('opt') # TODO: possible security issue? t.status = 'selected_quote' t.save() return HttpResponseRedirect(reverse('admin:order', kwargs={'pk': pk})) else: data = prepare_data(t, t.from_lang, t.to_lang) quote = get_quote(t.provider, data=data) if log_to_file_enabled(): log_to_file(data) if t.provider == 'supertext': res = json.loads(quote) return render_to_response( 'aldryn_translator/quote.html', {'res': res, 'dev': is_dev()}, context_instance=RequestContext(request)) else: raise NotImplementedError()
category = sys.argv[2] test_type = sys.argv[3] url = sys.argv[4] status_code = None result = None content = None request_body = None if category == 'web': if test_type == 'status_code': status_code = sys.argv[5] result = verify_web_status_code(url, status_code) elif test_type == 'content': content = sys.argv[5] result = verify_web_content(url, content) else: utils.log_to_file('test type invalid') elif category == 'service': request_body = sys.argv[6] if test_type == 'status_code': status_code = sys.argv[5] result = verify_service_status_code(url, request_body, status_code) elif test_type == 'content': content = sys.argv[5] result = verify_service_content(url, request_body, content) else: utils.log_to_file('category is invalid') # write result to respective results file if not result is None: write_result(task_id, result) if result == 0: