def submit(parent_name, parent_id, data_file, id_tracking_file=node_tracking_file): log.info('Starting submission of subjects.') nodes = [] for record in load_data(data_file): try: log.debug('...trying next record...') # log.debug(record) n = load(record['rand_subject_id']) # n = Subject() if not n.rand_subject_id: saved = validate_record(parent_id, n, record) if saved: header = settings.node_id_tracking.id_fields vals = values_to_node_dict( [[node_type,saved.rand_subject_id,saved.id, parent_type,parent_name,parent_id, get_cur_datetime()]] ) write_out_csv(id_tracking_file, values=vals) nodes.append(vals) except Exception, e: log.error(e) raise e
def submit(data_file, id_tracking_file=node_tracking_file): log.info('Starting submission of %ss.', node_type) nodes = [] csv_fieldnames = get_field_header(data_file) for record in load_data(data_file): # check not 'unknown' jaxid, not missing visit info if len(record['rand_subject_id']) > 0: log.debug('\n...next record...') try: log.debug('data record: '+str(record)) # Node-Specific Variables: load_search_field = 'rand_subject_id' internal_id = record['rand_subject_id'] parent_internal_id = record['rand_subject_id'] #grand_parent_internal_id = record['rand_patient_id'] import pdb ; pdb.set_trace() #parent_id = get_parent_node_id(id_tracking_file, parent_type, parent_internal_id) parent_id = record['parent_osdf_id'] log.debug('matched parent_id: %s', parent_id) if parent_id: node_is_new = False # set to True if newbie node = load(internal_id, load_search_field) if not getattr(node, load_search_field): log.debug('loaded node newbie...') node_is_new = True import pdb ; pdb.set_trace() saved = validate_record(parent_id, node, record, data_file_name=data_file) if saved: header = settings.node_id_tracking.id_fields saved_name = getattr(saved, load_search_field) vals = values_to_node_dict( [[node_type.lower(),saved_name,saved.id, parent_type.lower(),parent_internal_id,parent_id, get_cur_datetime()]], header ) nodes.append(vals) if node_is_new: write_out_csv(id_tracking_file, fieldnames=get_field_header(id_tracking_file), values=vals) else: log.error('No parent_id found for %s', parent_internal_id) except Exception, e: log.exception(e) raise e else: write_out_csv(data_file+'_records_no_submit.csv', fieldnames=record.keys(), values=[record,])
def submit(data_file, id_tracking_file=node_tracking_file): log.info('Starting submission of %ss.', node_type) nodes = [] csv_fieldnames = get_field_header(data_file) write_csv_headers(data_file,fieldnames=csv_fieldnames) for record in load_data(data_file): # if record['consented'] == 'YES' \ # and record['visit_number'] != 'UNK': #if record['visit_number'] != 'UNK': # use of 'UNK' = hack workaround for unreconciled visit list log.info('\n...next record...') try: log.debug('data record: '+str(record)) # node-specific variables: load_search_field = 'visit_id' internal_id = record['DCC_VISIT_IDS'] parent_internal_id = record['rand_patient_id'] ##Text ID to find the parent and get back OSDF ID grand_parent_internal_id = 'prediabetes' parent_id = get_parent_node_id( id_tracking_file, parent_type, parent_internal_id) # grand_parent_id = get_parent_node_id( # id_tracking_file, grand_parent_type, grand_parent_internal_id) if parent_id: node_is_new = False # set to True if newbie node = load(internal_id, load_search_field) if not getattr(node, load_search_field): log.debug('loaded node newbie...') node_is_new = True saved = validate_record(parent_id, node, record, data_file_name=data_file) if saved: header = settings.node_id_tracking.id_fields saved_name = getattr(saved, load_search_field) vals = values_to_node_dict( [[node_type.lower(), saved_name, saved.id, parent_type.lower(), parent_internal_id, parent_id, get_cur_datetime()]], header ) nodes.append(vals) if node_is_new: write_out_csv(id_tracking_file, fieldnames=get_field_header(id_tracking_file), values=vals) else: log.error('No parent_id found for %s', parent_internal_id) except Exception, e: log.exception(e) raise e
def submit(data_file, id_tracking_file=node_tracking_file): log.info('Starting submission of %ss.', node_type) nodes = [] csv_fieldnames = get_field_header(data_file) write_csv_headers(data_file,fieldnames=csv_fieldnames) for record in load_data(data_file): log.info('...next record...') try: log.debug('data record: '+str(record)) # node-specific variables: load_search_field = 'local_file' internal_id = os.path.basename(record[load_search_field]) parent_internal_id = record['prep_id'] grand_parent_internal_id = record['visit_id'] parent_id = get_parent_node_id( id_tracking_file, parent_type, parent_internal_id) log.debug('matched parent_id: %s', parent_id) if parent_id: node_is_new = False # set to True if newbie node = load(internal_id, load_search_field) if not getattr(node, load_search_field): log.debug('loaded node newbie...') node_is_new = True saved = validate_record(parent_id, node, record, data_file_name=data_file) if saved: # load_search_field = 'urls' header = settings.node_id_tracking.id_fields if record['consented'] == 'YES': saved_name = os.path.basename(getattr(saved, load_search_field)) else: saved_name = '-'.join([getattr(saved, 'comment'), 'private_file']) vals = values_to_node_dict( [[node_type.lower(), saved_name, saved.id, parent_type.lower(), parent_internal_id, parent_id, get_cur_datetime()]], header ) nodes.append(vals) if node_is_new: write_out_csv(id_tracking_file, fieldnames=get_field_header(id_tracking_file), values=vals) else: log.error('No parent_id found for %s', parent_internal_id) except Exception, e: log.exception(e) raise e
def update_nodes(session, data_file, node_type): """Retrieve node info for each 'internal_id' found in search() """ log.info('Starting updates of %ss.', node_type) data_file_log = data_file + '.updated.csv' for record in load_data(data_file): try: # log.debug("record: %s", record) update_node(session, record, node_type) except Exception, e: log.exception(e) raise e
def submit(data_file, id_tracking_file=node_tracking_file): log.info('Starting submission of %ss.', node_type) nodes = [] csv_fieldnames = get_field_header(data_file) write_csv_headers(data_file,fieldnames=csv_fieldnames) for record in load_data(data_file): log.info('...next record...') try: log.debug('data record: '+str(record)) # node-specific variables: load_search_field = 'comment' internal_id = str(record['host_transcriptomics_id']) +'.host_transcriptomics' parent_internal_id = record['host_seq_prep_name_id'] ##Link to Host_seq_prep ID grand_parent_internal_id = record['sample_name_id'] ##Link to Sample ID parent_id = get_parent_node_id( id_tracking_file, parent_type, parent_internal_id) log.debug('matched parent_id: %s', parent_id) if parent_id: node_is_new = False # set to True if newbie node = load(internal_id, load_search_field) if not getattr(node, load_search_field): log.debug('loaded node newbie...') node_is_new = True import pdb ; pdb.set_trace() saved = validate_record(parent_id, node, record, data_file_name=data_file) if saved: header = settings.node_id_tracking.id_fields saved_name = getattr(saved, load_search_field) vals = values_to_node_dict( [[node_type.lower(),saved_name,saved.id, parent_type.lower(),parent_internal_id,parent_id]], header ) nodes.append(vals) if node_is_new: write_out_csv(id_tracking_file, fieldnames=get_field_header(id_tracking_file), values=vals) else: log.error('No parent_id found for %s', parent_internal_id) except Exception, e: log.exception(e) raise e
def submit(data_file, id_tracking_file=node_tracking_file): log.info('Starting submission of %ss.', node_type) nodes = [] csv_fieldnames = get_field_header(data_file) write_csv_headers(data_file,fieldnames=csv_fieldnames) for record in load_data(data_file): log.info('\n...next record...') try: log.debug('data record: '+str(record)) # node-specific variables: load_search_field = 'comment' internal_id = record['sample_name_id'] + '.proteome' parent_internal_id = record['sample_name_id'] + '.hostassayprep' grand_parent_internal_id = record['visit_id'] parent_id = get_parent_node_id( id_tracking_file, parent_type, parent_internal_id) node_is_new = False # set to True if newbie node = load(internal_id, load_search_field) if not getattr(node, load_search_field): log.debug('loaded node newbie...') node_is_new = True import pdb ; pdb.set_trace() saved = validate_record(parent_id, node, record, data_file_name=data_file) if saved: header = settings.node_id_tracking.id_fields saved_name = getattr(saved, load_search_field) vals = values_to_node_dict( [[node_type.lower(),saved_name,saved.id, parent_type.lower(),parent_internal_id,parent_id]], header ) nodes.append(vals) if node_is_new: write_out_csv(id_tracking_file, fieldnames=get_field_header(id_tracking_file), values=vals) except Exception, e: log.exception(e) raise e
def submit(data_file, id_tracking_file=node_tracking_file): log.info('Starting submission of %ss.', node_type) nodes = [] csv_fieldnames = get_field_header(data_file) write_csv_headers(data_file,fieldnames=csv_fieldnames) for record in load_data(data_file): log.info('\n...next record...') try: log.debug('data record: '+str(record)) if record['local_file'] != '': load_search_field = 'local_file' internal_id = os.path.basename(record['local_file']) parent_internal_id = record['raw_file_id'] grand_parent_internal_id = record['prep_id'] parent_id = get_parent_node_id( id_tracking_file, parent_type, parent_internal_id) node_is_new = False # set to True if newbie node = load(internal_id, load_search_field) if not getattr(node, load_search_field): log.debug('loaded node newbie...') node_is_new = True saved = validate_record(parent_id, node, record, data_file_name=data_file) if saved: header = settings.node_id_tracking.id_fields vals = values_to_node_dict( [[node_type.lower(),saved_name,saved.id, parent_type.lower(),parent_internal_id,parent_id]], header ) nodes.append(vals) if node_is_new: write_out_csv(id_tracking_file, fieldnames=get_field_header(id_tracking_file), values=vals) except Exception, e: log.exception(e) raise e