def write_scantsv(bids_dir, dicom_dir=None, live=False): """Make subject scan files (needed for NDA submission)""" # TODO: improved with metadata if not os.path.exists(dicom_dir): logging.warning('Specify valid dicom directory with [-d] flag') return layout = BIDSLayout(bids_dir) subs = sorted([x for x in layout.get_subjects()]) for sid in subs: dcm = read_file(glob(op(dicom_dir, '*' + sid, '*'))[-1], force=True).AcquisitionDate date = '-'.join([dcm[:4], dcm[4:6], dcm[6:]]) logging.info("{0}'s scan date: {1}".format(sid, date)) scans = [] for scan in [ f.filename for f in layout.get(subject=sid, extensions=['nii', 'nii.gz']) ]: paths = scan.split(os.sep) scans.append(os.sep.join(paths[-2:])) outname = op(bids_dir, paths[-3], paths[-3] + '_scans.tsv') if live: with open(outname, 'wt') as tsvfile: writer = csv.writer(tsvfile, delimiter='\t') writer.writerow(['filename', 'acq_time']) for scan in sorted(scans): writer.writerow([scan, date]) logging.info('Wrote {0}'.format(outname))
def read_data(test=False): """ Read train and test data """ df_train = pd.read_csv(op(PATH_RAW, 'train.csv'), sep=';') df_test = pd.read_csv(op(PATH_RAW, 'test.csv'), sep=';') if test: return df_train, df_test else: return df_train
def split_geojson(gdf, outdir, groupby_col='type_id', remaps=None): if remaps == None: for name, group in gdf.groupby([groupby_col]): group.to_file(op(outdir, "{}.geojson".format(name)), driver="GeoJSON") else: gdf[groupby_col] = gdf[groupby_col].map(remaps).fillna( gdf[groupby_col]) for name, group in gdf.groupby([groupby_col]): group.to_file(op(outdir, "{}.geojson".format(name)), driver="GeoJSON")
def add_sub(data_dir, subjpre, live=False): """Add BIDS sub- prefix to subjects converted with heudiconv""" if not subjpre: sys.exit('Specify subject prefix with [-p] flag') subjs = sorted( [x for x in os.listdir(data_dir) if subjpre in x and 'sub-' not in x]) for subj in subjs: old = op(data_dir, subj) new = op(data_dir, 'sub-' + subj) logging.info(msg.format(old, new)) if live: os.rename(old, new) return subjs
def image_intercept(bbox_geojson, image_folder, out_folder, image_id_field, create_subset=False): gdf = gpd.read_file(bbox_geojson) unique = gdf[image_id_field].unique() if create_subset == True: for i in image_id_field: src = op(image_folder, i) dst = op(subset_folder, i) copyfile(src, dst) with open(op(out_folder, sp(base(bbox_geojson))[0] + "_images.csv"), 'w') as f: writer = csv.writer(f) writer.writerows(zip(unique))
def get_fasttext(): """ Load fasttext french pretrained model https://fasttext.cc/docs/en/pretrained-vectors.html """ filename = op(PATH_EXTERNAL, 'wiki.fr.bin') model = KeyedVectors.load_word2vec_format(filename, binary=True) return model
def drop_underscore(data_dir, live=False): """Remove underscore from subject id""" # Change directories first, then files subjs = sorted([x for x in os.listdir(data_dir) if x.startswith('sub-')]) for subj in subjs: if subj.count('_') == 0: continue corr = subj.replace('_', '') logging.info(msg.format(op(data_dir, subj), op(data_dir, corr))) if not live: return os.rename(op(data_dir, subj), op(data_dir, corr)) # refresh after each rename layout = BIDSLayout(data_dir) files = [f.filename for f in layout.get() if subj in f.filename] for file in files: fix = file.replace(subj, corr) os.rename(file, fix)
def get_stopwords(): """ Get french stopwords """ with open(op(PATH_EXTERNAL, 'fr-stopwords.txt')) as fp: stopwords = fp.read().splitlines() return stopwords
def main(): class MyParser(argparse.ArgumentParser): # to run from commandline def error(self, message): sys.stderr.write('error: %s\n' % message) self.print_help() sys.exit(2) defstr = ' (default %(default)s)' parser = argparse.ArgumentParser(prog='makebids.py', description=__doc__) parser.add_argument('datadir', help='''bids-like directory''') parser.add_argument('-p', dest='pre', type=str, help='''identifier across all subjects''') parser.add_argument('-d', '--dicoms', type=str, default=None, help="""dicom directory""") parser.add_argument('--live', default=False, action='store_true', help="""WARNING: DON'T INCLUDE ON FIRST PASS""") parser.add_argument('--full', action='store_true', default=False, help="""Run through each option""") parser.add_argument('-q', '--quiet', action='store_true', default=False, help="""Make the python logger only log errors""") args = parser.parse_args() bids_dir = os.path.abspath(args.datadir) if not os.path.exists(bids_dir): sys.exit('Specify valid BIDS data directory') if args.dicoms: dicom_dir = os.path.abspath(args.dicoms) else: dicom_dir = None loglevel = logging.INFO if args.quiet: loglevel = logging.WARNING # Set logging output logging.basicConfig(filename=op(os.getcwd(), 'mbOUT.txt'), format='%(asctime)s %(levelname)s:%(message)s', level=loglevel) # when files are renamed refresh = lambda x=bids_dir: BIDSLayout(x) if args.full: add_sub(bids_dir, args.pre, args.live) drop_underscore(bids_dir, args.live) # using BIDS grabbids after renaming files if dicom_dir: write_scantsv(bids_dir, dicom_dir, args.live) # set layout once no more file renamings add_taskname(refresh(), args.live) fix_fieldmaps(refresh(), args.live) else: choice = int(input(OPTIONS)) if choice == 1: add_sub(bids_dir, args.pre, args.live) elif choice == 2: drop_underscore(bids_dir, args.live) elif choice == 3: write_scantsv(bids_dir, dicom_dir, args.live) elif choice == 4: add_taskname(refresh(), args.live) elif choice == 5: fix_fieldmaps(refresh(), args.live) else: sys.exit('Option not recognized')