def submit(data_file, id_tracking_file=node_tracking_file): log.info('Starting submission of %ss.', node_type) nodes = [] csv_fieldnames = get_field_header(data_file) write_csv_headers(data_file,fieldnames=csv_fieldnames) for record in load_data(data_file): # if record['consented'] == 'YES' \ # and record['visit_number'] != 'UNK': #if record['visit_number'] != 'UNK': # use of 'UNK' = hack workaround for unreconciled visit list log.info('\n...next record...') try: log.debug('data record: '+str(record)) # node-specific variables: load_search_field = 'visit_id' internal_id = record['DCC_VISIT_IDS'] parent_internal_id = record['rand_patient_id'] ##Text ID to find the parent and get back OSDF ID grand_parent_internal_id = 'prediabetes' parent_id = get_parent_node_id( id_tracking_file, parent_type, parent_internal_id) # grand_parent_id = get_parent_node_id( # id_tracking_file, grand_parent_type, grand_parent_internal_id) if parent_id: node_is_new = False # set to True if newbie node = load(internal_id, load_search_field) if not getattr(node, load_search_field): log.debug('loaded node newbie...') node_is_new = True saved = validate_record(parent_id, node, record, data_file_name=data_file) if saved: header = settings.node_id_tracking.id_fields saved_name = getattr(saved, load_search_field) vals = values_to_node_dict( [[node_type.lower(), saved_name, saved.id, parent_type.lower(), parent_internal_id, parent_id, get_cur_datetime()]], header ) nodes.append(vals) if node_is_new: write_out_csv(id_tracking_file, fieldnames=get_field_header(id_tracking_file), values=vals) else: log.error('No parent_id found for %s', parent_internal_id) except Exception, e: log.exception(e) raise e
def submit(data_file, id_tracking_file=node_tracking_file): log.info('Starting submission of %ss.', node_type) nodes = [] csv_fieldnames = get_field_header(data_file) for record in load_data(data_file): # check not 'unknown' jaxid, not missing visit info if len(record['DCC_VISIT_IDS']) > 0: log.debug('\n...next record...') try: log.debug('data record: '+str(record)) # Node-Specific Variables: load_search_field = 'name' internal_id = record['sample_name_id'] parent_internal_id = record['DCC_VISIT_IDS'] grand_parent_internal_id = record['rand_patient_id'] parent_id = get_parent_node_id( id_tracking_file, parent_type, parent_internal_id) log.debug('matched parent_id: %s', parent_id) if parent_id: node_is_new = False # set to True if newbie node = load(internal_id, load_search_field) if not getattr(node, load_search_field): log.debug('loaded node newbie...') node_is_new = True saved = validate_record(parent_id, node, record, data_file_name=data_file) if saved: header = settings.node_id_tracking.id_fields saved_name = getattr(saved, load_search_field) vals = values_to_node_dict( [[node_type.lower(),saved_name,saved.id, parent_type.lower(),parent_internal_id,parent_id, get_cur_datetime()]], header ) nodes.append(vals) if node_is_new: write_out_csv(id_tracking_file, fieldnames=get_field_header(id_tracking_file), values=vals) else: log.error('No parent_id found for %s', parent_internal_id) except Exception, e: log.exception(e) raise e else: write_out_csv(data_file+'_records_no_submit.csv', fieldnames=record.keys(), values=[record,])
def submit(data_file, id_tracking_file=node_tracking_file): log.info('Starting submission of %ss.', node_type) nodes = [] csv_fieldnames = get_field_header(data_file) write_csv_headers(data_file,fieldnames=csv_fieldnames) for record in load_data(data_file): log.info('...next record...') try: log.debug('data record: '+str(record)) # node-specific variables: load_search_field = 'local_file' internal_id = os.path.basename(record[load_search_field]) parent_internal_id = record['prep_id'] grand_parent_internal_id = record['visit_id'] parent_id = get_parent_node_id( id_tracking_file, parent_type, parent_internal_id) log.debug('matched parent_id: %s', parent_id) if parent_id: node_is_new = False # set to True if newbie node = load(internal_id, load_search_field) if not getattr(node, load_search_field): log.debug('loaded node newbie...') node_is_new = True saved = validate_record(parent_id, node, record, data_file_name=data_file) if saved: # load_search_field = 'urls' header = settings.node_id_tracking.id_fields if record['consented'] == 'YES': saved_name = os.path.basename(getattr(saved, load_search_field)) else: saved_name = '-'.join([getattr(saved, 'comment'), 'private_file']) vals = values_to_node_dict( [[node_type.lower(), saved_name, saved.id, parent_type.lower(), parent_internal_id, parent_id, get_cur_datetime()]], header ) nodes.append(vals) if node_is_new: write_out_csv(id_tracking_file, fieldnames=get_field_header(id_tracking_file), values=vals) else: log.error('No parent_id found for %s', parent_internal_id) except Exception, e: log.exception(e) raise e
def submit(data_file, id_tracking_file=node_tracking_file): log.info('Starting submission of %ss.', node_type) nodes = [] csv_fieldnames = get_field_header(data_file) write_csv_headers(data_file,fieldnames=csv_fieldnames) for record in load_data(data_file): log.info('...next record...') try: log.debug('data record: '+str(record)) # node-specific variables: load_search_field = 'comment' internal_id = str(record['host_transcriptomics_id']) +'.host_transcriptomics' parent_internal_id = record['host_seq_prep_name_id'] ##Link to Host_seq_prep ID grand_parent_internal_id = record['sample_name_id'] ##Link to Sample ID parent_id = get_parent_node_id( id_tracking_file, parent_type, parent_internal_id) log.debug('matched parent_id: %s', parent_id) if parent_id: node_is_new = False # set to True if newbie node = load(internal_id, load_search_field) if not getattr(node, load_search_field): log.debug('loaded node newbie...') node_is_new = True import pdb ; pdb.set_trace() saved = validate_record(parent_id, node, record, data_file_name=data_file) if saved: header = settings.node_id_tracking.id_fields saved_name = getattr(saved, load_search_field) vals = values_to_node_dict( [[node_type.lower(),saved_name,saved.id, parent_type.lower(),parent_internal_id,parent_id]], header ) nodes.append(vals) if node_is_new: write_out_csv(id_tracking_file, fieldnames=get_field_header(id_tracking_file), values=vals) else: log.error('No parent_id found for %s', parent_internal_id) except Exception, e: log.exception(e) raise e
def submit(data_file, id_tracking_file=node_tracking_file): log.info('Starting submission of %ss.', node_type) nodes = [] csv_fieldnames = get_field_header(data_file) write_csv_headers(data_file,fieldnames=csv_fieldnames) for record in load_data(data_file): log.info('\n...next record...') try: log.debug('data record: '+str(record)) # node-specific variables: load_search_field = 'comment' internal_id = record['sample_name_id'] + '.proteome' parent_internal_id = record['sample_name_id'] + '.hostassayprep' grand_parent_internal_id = record['visit_id'] parent_id = get_parent_node_id( id_tracking_file, parent_type, parent_internal_id) node_is_new = False # set to True if newbie node = load(internal_id, load_search_field) if not getattr(node, load_search_field): log.debug('loaded node newbie...') node_is_new = True import pdb ; pdb.set_trace() saved = validate_record(parent_id, node, record, data_file_name=data_file) if saved: header = settings.node_id_tracking.id_fields saved_name = getattr(saved, load_search_field) vals = values_to_node_dict( [[node_type.lower(),saved_name,saved.id, parent_type.lower(),parent_internal_id,parent_id]], header ) nodes.append(vals) if node_is_new: write_out_csv(id_tracking_file, fieldnames=get_field_header(id_tracking_file), values=vals) except Exception, e: log.exception(e) raise e
def submit(data_file, id_tracking_file=node_tracking_file): log.info('Starting submission of %ss.', node_type) nodes = [] csv_fieldnames = get_field_header(data_file) write_csv_headers(data_file,fieldnames=csv_fieldnames) for record in load_data(data_file): log.info('\n...next record...') try: log.debug('data record: '+str(record)) if record['local_file'] != '': load_search_field = 'local_file' internal_id = os.path.basename(record['local_file']) parent_internal_id = record['raw_file_id'] grand_parent_internal_id = record['prep_id'] parent_id = get_parent_node_id( id_tracking_file, parent_type, parent_internal_id) node_is_new = False # set to True if newbie node = load(internal_id, load_search_field) if not getattr(node, load_search_field): log.debug('loaded node newbie...') node_is_new = True saved = validate_record(parent_id, node, record, data_file_name=data_file) if saved: header = settings.node_id_tracking.id_fields vals = values_to_node_dict( [[node_type.lower(),saved_name,saved.id, parent_type.lower(),parent_internal_id,parent_id]], header ) nodes.append(vals) if node_is_new: write_out_csv(id_tracking_file, fieldnames=get_field_header(id_tracking_file), values=vals) except Exception, e: log.exception(e) raise e