def import_file(name, time_row=0, censor_row=1, features_rows=None): cohort = np.genfromtxt(name, delimiter=',', dtype=None, filling_values='', comments="!") survival_time, survival_censor = get_time_and_censor(cohort, time_row, censor_row) row_headers = list(cohort[:,0]) if 'patient' in row_headers: patient_row_idx = row_headers.index('patient') elif 'ID_REF' in row_headers: patient_row_idx = row_headers.index('ID_REF') else: print 'Error: \'patient\' or \'ID_REF\' header not found' help_message.usage() # Note: these return *all* the metadata rows if feature_rows is None, # or the selected ones if feature_rows is set. features, feature_names = get_features(features_rows, patient_row_idx, cohort) all_metadata_row_names = get_feature_row_names(patient_row_idx, cohort) gene_names = row_headers[patient_row_idx+1:] patient_value = cohort[patient_row_idx+1:, 1:] patient_value = clear_blanks(patient_value) patient_value = patient_value.astype(np.float) input_data = { 'patient_values': patient_value, 'survival_time': survival_time, 'survival_censor': survival_censor, 'time_row_num': time_row, 'censor_row_num': censor_row, 'metadata_row_names': all_metadata_row_names, 'gene_names': gene_names, 'metadata_feature_names': feature_names, 'metadata_features': features, } return input_data
def get_row_titles(name): with open(name, 'rU') as f: row_titles = [] for i,line in enumerate(f.readlines()): row_title = line.split(',')[0] if row_title[0] == '!': continue if row_title == 'patient' or row_title == 'ID_REF': break elif i > 50: print "'patient' nor 'ID_REF' found. Aborting." help_message.usage() else: row_titles.append(row_title) return row_titles
def get_options(argv): try: opts, args = getopt.getopt(argv[1:], 'ho:i:vm:', ['help', 'input=', 'output-directory=', 'multivariates=', 'interactive']) except getopt.error, msg: help_message.usage()
def get_options(argv): try: opts, args = getopt.getopt(argv[1:], 'ho:i:vm:', ['help', 'input=', 'output-directory=', 'multivariates=', 'interactive']) except getopt.error, msg: help_message.usage() infile = None outdir = '.' multivariates = [] for option, value in opts: if option == '-v': verbose = True if option in ('-h', '--help'): help_message.usage() if option in ('-i', '--input'): infile = value if option in ('-o', '--output-directory'): outdir = value #TODO(joans): process this by looking up row numbers so that # all the weird complexity of metadata_features sometimes having # all the features can go away if option in ('-m', '--mutlivariates'): multivariates = value.split(',') if not infile: help_message.usage() interactive = ('--interactive', '') in opts return infile, outdir, multivariates, interactive def get_row_number_from_title(title, row_titles):