def validate_record(parent_id, node, record, data_file_name=node_type):
    """update record fields
       validate node
       if valid, save, if not, return false
    """
    log.info("in validate/save: "+node_type)
    csv_fieldnames = get_field_header(data_file_name)
    write_csv_headers(data_file_name,fieldnames=csv_fieldnames)

    #print("Calculating md5sum.")
    #md5sum = hashlib.md5()
    #    with open(record['local_url'], 'rb') as f:
    #    for chunk in iter(lambda: f.read(1024*1024), ''):
    #        md5sum.update(chunk)

    node.study         = 'prediabetes'
    node.comment       = str(record['sample_name_id']) + '.hostseqprep'
    node.prepared_by   = 'Varsha Rao and Reza Sailani'
    node.sequencing_contact = 'Varsha Rao and Reza Sailani'
    node.sequencing_center = 'Stanford University'
    node.format        = 'fastq'
    node.format_doc    = 'https://en.wikipedia.org/wiki/' + str(node.format)
    node.exp_length    = 0 #record['exp_length']
    node.local_file    = str(record['sample_name_id']) + '.hostseqprep'
    node.storage_duration = int('1')
#    node.checksums     = {'md5': md5sum.hexdigest(), 'sha256':record['sha256']}
#    node.size          = int(record['size'])
    node.tags = list_tags(node.tags,
                          # 'test', # for debug!!
                          'sample name: '+record['visit_id'],
                          'visit id: '+record['visit_id'],
                          'subject id: '+record['rand_subject_id'],
                          'file prefix: '+ record['prep_id'],
                          'file name: '+ str(record['sample_name_id']) + '.hostseqprep',
                         )
    node.lib_layout     = record['lib_layout']
    node.lib_selection  = record['lib_selection']
    node.ncbi_taxon_id  = '9606'
    node.prep_id        = record['prep_id']

    parent_link = {'prepared_from':[parent_id]}
    log.debug('parent_id: '+str(parent_link))
    node.links = parent_link

    csv_fieldnames = get_field_header(data_file_name)
    if not node.is_valid():
        write_out_csv(data_file_name+'_invalid_records.csv',
                      fieldnames=csv_fieldnames, values=[record,])
        invalidities = node.validate()
        err_str = "Invalid {}!\n\t{}".format(node_type, str(invalidities))
        log.error(err_str)
        # raise Exception(err_str)
    elif node.save():
        write_out_csv(data_file_name+'_submitted.csv',
                      fieldnames=csv_fieldnames, values=[record,])
        return node
    else:
        write_out_csv(data_file_name+'_unsaved_records.csv',
                      fieldnames=csv_fieldnames, values=[record,])
        return False
def validate_record(parent_id, node, record, data_file_name=node_type):
    """update record fields
       validate node
       if valid, save, if not, return false
    """
    log.info("in validate/save: "+node_type)
    csv_fieldnames = get_field_header(data_file_name)
    write_csv_headers(data_file_name, fieldnames=csv_fieldnames)

    local_file_name    = os.path.basename(record['local_file'])
    node.comment       = local_file_name
    node.study         = 'prediabetes'
    node.sequence_type = 'nucleotide'
    node.seq_model     = record['seq_model']
    node.format        = 'fastq'
    node.format_doc    = 'https://en.wikipedia.org/wiki/FASTQ_format'
    node.exp_length    = 0  # record['exp_length']
    # node.local_file    = record['local_file'] if record['consented'] == 'YES' else ''
    if record['consented'] == 'YES':
        node.local_file = record['local_file']
        node.checksums     = {'md5': record['md5'], 'sha256': record['sha256']}
        node.size          = int(record['size'])
    else:
        node.private_files = True
        node.checksums     = {'md5': '00000000000000000000000000000000'}
        node.size          = 0
    node.tags = list_tags(
                          'sequence type: '   + 'RNAseq',
                          'jaxid (sample): '  + record['jaxid_sample'],
                          'sample name: '     + record['sample_name_id'],
                          'body site: '       + record['body_site'],
                          'subject id: '      + record['rand_subject_id'],
                          'study: '           + 'prediabetes',
                          'prep_id:'          + record['prep_id'],
                          'file name: '       + local_file_name,
                         )
    parent_link = {'sequenced_from':[parent_id]}
    log.debug('parent_id: '+str(parent_link))
    node.links = parent_link

    csv_fieldnames = get_field_header(data_file_name)
    if not node.is_valid():
        write_out_csv(data_file_name+'_invalid_records.csv',
                      fieldnames=csv_fieldnames, values=[record,])
        invalidities = node.validate()
        err_str = "Invalid {}!\n\t{}".format(node_type, str(invalidities))
        log.error(err_str)
        # raise Exception(err_str)
    elif node.save():
        write_out_csv(data_file_name+'_submitted.csv',
                      fieldnames=csv_fieldnames, values=[record,])
        return node
    else:
        write_out_csv(data_file_name+'_unsaved_records.csv',
                      fieldnames=csv_fieldnames, values=[record,])
        return False
def validate_record(parent_id, node, record, data_file_name=node_type):
    """update record fields
       validate node
       if valid, save, if not, return false
    """
    log.info("in validate/save: "+node_type)
    csv_fieldnames = get_field_header(data_file_name)
    write_csv_headers(data_file_name, fieldnames=csv_fieldnames)

    node.comment = record['prep_id']
    node.frag_size = 301 # goal size
    node.lib_layout = 'paired 301bp'
    node.lib_selection = ''
    node.mimarks = generate_mimarks(record)
    node.ncbi_taxon_id = '408170' \
            if 'stool' == record['body_site'] \
            else '1131769' # nasal
            # ST: http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=408170
            # NS: http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=1131769
    node.prep_id = record['prep_id']
    node.sequencing_center = 'Jackson Laboratory for Genomic Medicine'
    node.sequencing_contact = 'George Weinstock'
    node.storage_duration = 2112
    node.tags = list_tags(node.tags,
                          # 'test', # for debug!!
                          'jaxid (sample): '+record['jaxid_sample'],
                          'jaxid (library): '+record['jaxid_library'] \
                                          if record['jaxid_library'] \
                                          else 'jaxid (library): unknown',
                          'visit id: '+record['visit_id'],
                          'subject id: '+record['rand_subject_id'],
                          'study: prediabetes',
                          'file prefix: '+ record['prep_id'],
                         )
    parent_link = {'prepared_from':[parent_id]}
    log.debug('parent_id: '+str(parent_link))
    node.links = parent_link

    csv_fieldnames = get_field_header(data_file_name)
    if not node.is_valid():
        write_out_csv(data_file_name+'_invalid_records.csv',
                      fieldnames=csv_fieldnames, values=[record,])
        invalidities = node.validate()
        err_str = "Invalid {}!\n\t{}".format(node_type, str(invalidities))
        log.error(err_str)
        # raise Exception(err_str)
    elif node.save():
        write_out_csv(data_file_name+'_submitted.csv',
                      fieldnames=csv_fieldnames, values=[record,])
        return node
    else:
        write_out_csv(data_file_name+'_unsaved_records.csv',
                      fieldnames=csv_fieldnames, values=[record,])
        return False
예제 #4
0
def validate_record(parent_id, node, record, data_file_name=node_type):
    """update record fields
       validate node
       if valid, save, if not, return false
    """
    csv_fieldnames = get_field_header(data_file_name)
    write_csv_headers(data_file_name,fieldnames=csv_fieldnames)

    local_file_name = os.path.basename(record['local_file_clean'])
    node.study         = 'prediabetes'
    node.comment       = local_file_name
    node.format        = record['format'] # only 'fasta', 'fastq' allowed!
    node.format_doc    = 'https://en.wikipedia.org/wiki/' +\
                         record['format'].upper() + '_format'
    if record['consented'] == 'YES':
        node.local_file = record['local_file_clean']
        node.checksums  = {'md5':record['clean_md5'], 'sha256':record['clean_sha256']}
        node.size       = int(record['clean_size'])
    else:
        node.private_files = True
        node.checksums     = {'md5': '00000000000000000000000000000000'}
        node.size          = 1
    node.tags = list_tags(
                          'study: prediabetes',
                          'subject id: '+record['rand_subject_id'],
                          'sample name: '+record['sample_name_id'],
                          'body site: '+record['body_site'],
                          'prep_id:' + record['prep_id'],
                          'raw_file_id: '+ record['raw_file_id'],
                         )

    log.debug('parent_id: '+str(parent_id))
    node.links = {'computed_from':[parent_id]}

    if not node.is_valid():
        invalidities = str(node.validate())
        err_str = "Invalid node {}!\t\t{}".format(node_type, invalidities)
        log.error(err_str)
        # vals = [record]
        # vals.append(invalidities)
        write_out_csv(data_file_name+'_invalid_records.csv',
                fieldnames=csv_fieldnames,
                values=[record,])
        return False
    elif node.save():
        log.info('node saved: '+str(node.comment))
        write_out_csv(data_file_name+'_submitted.csv',
                fieldnames=csv_fieldnames, values=[record,])
        return node
    else:
        log.info('node NOT saved: '+str(node.comment))
        write_out_csv(data_file_name+'_unsaved_records.csv',
                fieldnames=csv_fieldnames, values=[record,])
        return False
예제 #5
0
def validate_record(parent_id, node, record, data_file_name=node_type):
    """update record fields
       validate node
       if valid, save, if not, return false
    """
    log.info("in validate/save: "+node_type)
    csv_fieldnames = get_field_header(data_file_name)
    write_csv_headers(data_file_name,fieldnames=csv_fieldnames)

    node.study         = 'prediabetes'
    node.comment       = record['local_uri']
    node.prepared_by   = record['sequencing_contact']
    node.sequence_type = 'nucleotide'
    node.format        = 'fastq'
    node.format_doc    = 'https://en.wikipedia.org/wiki/'
    node.exp_length    = 0 #record['exp_length']
    node.local_file    = [record['local_uri']]
#    node.checksums     = {'md5':record['md5'], 'sha256':record['sha256']}
#    node.size          = int(record['size'])
    node.tags = list_tags(node.tags,
                          # 'test', # for debug!!
                          'sample name: '+record['visit_id'],
                          'visit id: '+record['visit_id'],
                          'subject id: '+record['rand_subject_id'],
                          'study: prediabetes',
                          'file prefix: '+ record['prep_id'],
                          'file name: '+ record['local_uri'],
                         )
    node.lib_layout     = record['lib_layout']
    node.lib_selection  = record['lib_selection']
    node.ncbi_taxon_id  = record['ncbi_taxon_id']
    node.prep_id        = record['prep_id']

    parent_link = {'sequenced_from':[parent_id]}
    log.debug('parent_id: '+str(parent_link))
    node.links = parent_link

    csv_fieldnames = get_field_header(data_file_name)
    if not node.is_valid():
        write_out_csv(data_file_name+'_invalid_records.csv',
                      fieldnames=csv_fieldnames, values=[record,])
        invalidities = node.validate()
        err_str = "Invalid {}!\n\t{}".format(node_type, str(invalidities))
        log.error(err_str)
        # raise Exception(err_str)
    elif node.save():
        write_out_csv(data_file_name+'_submitted.csv',
                      fieldnames=csv_fieldnames, values=[record,])
        return node
    else:
        write_out_csv(data_file_name+'_unsaved_records.csv',
                      fieldnames=csv_fieldnames, values=[record,])
        return False
def validate_record(parent_id, node, record, data_file_name=node_type):
    """update record fields
       validate node
       if valid, save, if not, return false
    """
    csv_fieldnames = get_field_header(data_file_name)
    write_csv_headers(data_file_name,fieldnames=csv_fieldnames)

    node.study         = 'prediabetes'
    node.comment       = record['prep_id'] + ' ... Quality trimmed, cleaned, '\
                            + 'dehosted, converted fastq to fasta.'
    node.format        = record['format'] # only 'fasta', 'fastq' allowed!
    node.format_doc    = 'https://en.wikipedia.org/wiki/' +\
                         record['format'].upper() + '_format'
    node.local_file    = record['local_file']
    node.size          = int(record['size'])
    node.checksums     = {'md5':record['md5'], 'sha256':record['sha256']}
    node.tags = list_tags(node.tags,
                          # 'test', # for debug!!
                          'jaxid (sample): '+record['jaxid_sample'],
                          'jaxid (library): '+record['jaxid_library'] \
                                          if record['jaxid_library'] \
                                          else 'jaxid (library): none',
                          'sample name: '+record['sample_name_id'],
                          'body site: '+record['body_site'],
                          'visit id: '+record['visit_id'],
                          'subject id: '+record['rand_subject_id'],
                          'study: prediabetes',
                          'dna_prep_id: '+ record['prep_id'],
                          'raw_file_id: '+ record['raw_file_id'],
                          )

    log.debug('parent_id: '+str(parent_id))
    node.links = {'computed_from':[parent_id]}

    csv_fieldnames = get_field_header(data_file_name)
    if not node.is_valid():
        write_out_csv(data_file_name+'_invalid_records.csv',
                      fieldnames=csv_fieldnames, values=[record,])
        invalidities = node.validate()
        err_str = "Invalid {}!\n\t{}".format(node_type, str(invalidities))
        log.error(err_str)
        # raise Exception(err_str)
    elif node.save():
        write_out_csv(data_file_name+'_submitted.csv',
                      fieldnames=csv_fieldnames, values=[record,])
        return node
    else:
        write_out_csv(data_file_name+'_unsaved_records.csv',
                      fieldnames=csv_fieldnames, values=[record,])
        return False
def validate_record(parent_id, node, record, data_file_name=node_type):
    """update record fields
       validate node
       if valid, save, if not, return false
    """
    log.info("in validate/save: "+node_type)
    csv_fieldnames = get_field_header(data_file_name)
    write_csv_headers(data_file_name,fieldnames=csv_fieldnames)

    node.study         = 'prediabetes'
    node.comment       = record['local_file']
    node.sequence_type = 'nucleotide'
    node.seq_model     = record['seq_model']
    node.format        = 'fastq'
    node.format_doc    = 'https://en.wikipedia.org/wiki/FASTQ_format'
    node.exp_length    = 0 #record['exp_length']
    node.local_file    = record['local_file']
    node.checksums     = {'md5':record['MD5SUM'], 'sha256':record['SHA256']}
    node.size          = int(record['SIZE'])
    node.tags = list_tags(node.tags,
                          'sample name: '     + record['visit_id'],
                          'body site: '       + record['body_site'],
                          'visit id: '        + record['visit_id'],
                          'subject id: '      + record['rand_subject_id'],
                          'file prefix: '     + record['sample_name_id'] + '.hostseqprep',
                          'file name: '       + record['local_file'],
                          'sub-group: '       + record['subtype'],
                         )
    parent_link = {'sequenced_from':[parent_id]}
    log.debug('parent_id: '+str(parent_link))
    node.links = parent_link

    csv_fieldnames = get_field_header(data_file_name)
    if not node.is_valid():
        write_out_csv(data_file_name+'_invalid_records.csv',
                      fieldnames=csv_fieldnames, values=[record,])
        invalidities = node.validate()
        err_str = "Invalid {}!\n\t{}".format(node_type, str(invalidities))
        log.error(err_str)
        # raise Exception(err_str)
    elif node.save():
        write_out_csv(data_file_name+'_submitted.csv',
                      fieldnames=csv_fieldnames, values=[record,])
        return node
    else:
        write_out_csv(data_file_name+'_unsaved_records.csv',
                      fieldnames=csv_fieldnames, values=[record,])
        return False
예제 #8
0
def validate_record(parent_id, node, record, data_file_name=node_type):
    """update record fields
       validate node
       if valid, save, if not, return false
    """
    log.info("in validate/save: "+node_type)
    csv_fieldnames = get_field_header(data_file_name)

    node.name = record['sample_name_id']
    node.body_site = record['body_site'].lower()
    node.fma_body_site = record['fma_body_site']
    node.mixs = generate_mixs(record)
    node.tags = list_tags(node.tags,
            # 'test', # for debug!!
            'sample id: ' + record['sample_name_id'],
            'visit id: ' + record['visit_id'],
            'subject id: ' + record['rand_subject_id'],
            'study: prediabetes',
            'substudy: ' + record['Group'],
            )
    # node._attribs = record['attributes']

    parent_link = {'collected_during':[parent_id]}
    log.debug('parent_id: '+str(parent_link))
    node.links = parent_link

    if not node.is_valid():
        invalids = data_file_name[:-4]+'.invalid_records.csv'
        write_csv_headers(invalids, fieldnames=csv_fieldnames)
        write_out_csv(invalids, fieldnames=csv_fieldnames,
                      values=[record,])
        invalidities = node.validate()
        err_str = "Invalid {}!\n\t{}".format(node_type, str(invalidities))
        log.error(err_str)
        # raise Exception(err_str)
    elif node.save():
        submitted = data_file_name[:-4]+'.submitted.csv'
        write_csv_headers(submitted, fieldnames=csv_fieldnames)
        write_out_csv(submitted, fieldnames=csv_fieldnames,
                      values=[record,])
        return node
    else:
        unsaved = data_file_name[:-4]+'.unsaved.csv'
        write_csv_headers(unsaved, fieldnames=csv_fieldnames)
        write_out_csv(unsaved, fieldnames=csv_fieldnames,
                      values=[record,])
        return False
def validate_record(parent_id, node, record, data_file_name=node_type):
    """update record fields
       validate node
       if valid, save, if not, return false
    """
    csv_fieldnames = get_field_header(data_file_name)
    write_csv_headers(data_file_name,fieldnames=csv_fieldnames)

    node.study         = 'prediabetes'
    node.comment       = record['sample_name_id'] + ".metabolome"
    node.format        = record['format'] ## FIX TO HANDLE mzXML files
    node.format_doc    = 'https://en.wikipedia.org/wiki/Mass_spectrometry_data_format'
    node.subtype       = 'host'
    node.checksums     = {'md5':record['md5'], 'sha256':record['sha256']}
    node.local_file    = record['local_file']
    node.tags          = list_tags(node.tags,
                          'sample name: '+ record['sample_name_id'] + ".metabolome",
                          'visit id: '+ record['visit_id'],
                          'subject id: '+ record['rand_subject_id'],
                          'sample fluid type: ' + record['SAMPLE_FLUID_TYPE'],
                          'type: ' + record['Type'],
                          'batch: ' + record['BATCH'],
                          'mode: ' + record['MODE'],
                          )

    log.debug('parent_id: '+str(parent_id))
    node.links = {'derived_from':[parent_id]}

    csv_fieldnames = get_field_header(data_file_name)

    if not node.is_valid():
        write_out_csv(data_file_name+'_invalid_records.csv',
                      fieldnames=csv_fieldnames, values=[record,])
        invalidities = node.validate()
        err_str = "Invalid {}!\n\t{}".format(node_type, str(invalidities))
        log.error(err_str)
        # raise Exception(err_str)
    elif node.save():
        write_out_csv(data_file_name+'_submitted.csv',
                      fieldnames=csv_fieldnames, values=[record,])
        return node
    else:
        write_out_csv(data_file_name+'_unsaved_records.csv',
                      fieldnames=csv_fieldnames, values=[record,])
        return False
예제 #10
0
def validate_record(parent_id, node, record, data_file_name=node_type):
	"""update record fields
	   validate node
	   if valid, save, if not, return false
	"""
	log.info("in validate/save: "+node_type)
	csv_fieldnames = get_field_header(data_file_name)
	node.study = 'prediabetes'
	node.subtype = 'prediabetes'
	node.tags = list_tags('Race: ' + get_race(record['race_code']),
		'age: ' + record['age'],
		'gender: ' + get_gender(record['gender']),
		)

	parent_link = {'associated_with':[parent_id]}
	log.debug('parent_id: '+str(parent_link))
	node.links = parent_link

	if not node.is_valid():
		invalids = data_file_name[:-4]+'.invalid_records.csv'
		write_csv_headers(invalids, fieldnames=csv_fieldnames)
		write_out_csv(invalids, fieldnames=csv_fieldnames,
					  values=[record,])
		invalidities = node.validate()
		err_str = "Invalid {}!\n\t{}".format(node_type, str(invalidities))
		log.error(err_str)
		# raise Exception(err_str)
	elif node.save():
		submitted = data_file_name[:-4]+'.submitted.csv'
		write_csv_headers(submitted, fieldnames=csv_fieldnames)
		write_out_csv(submitted, fieldnames=csv_fieldnames,
					  values=[record,])
		return node
	else:
		unsaved = data_file_name[:-4]+'.unsaved.csv'
		write_csv_headers(unsaved, fieldnames=csv_fieldnames)
		write_out_csv(unsaved, fieldnames=csv_fieldnames,
					  values=[record,])
		return False
예제 #11
0
def validate_record(parent_id, node, record, data_file_name=node_type):
    """update record fields
       validate node
       if valid, save, if not, return false
    """
    log.info("in validate/save: "+node_type)
    csv_fieldnames = get_field_header(data_file_name)
    write_csv_headers(data_file_name,fieldnames=csv_fieldnames)

    node.visit_id = record['visit_id']
    node.visit_number = int(record['visit_number'])
    node.interval = int(record['interval'])
    node.tags = list_tags(node.tags,
                # 'test', # for debug!!
                'rand_subject_id: '+record['rand_subject_id'],
                'study: prediabetes',
                # 'study: '+record['study'],
                # 'sub_study: '+record['sub_study'],
                )
    log.debug('parent_id: '+str(parent_id))
    node.links = {'by':[parent_id]}

    csv_fieldnames = get_field_header(data_file_name)
    if not node.is_valid():
        write_out_csv(data_file_name+'_invalid_records.csv',
            fieldnames=csv_fieldnames,values=[record,])
        invalidities = node.validate()
        err_str = "Invalid {}!\n\t{}".format(node_type, str(invalidities))
        log.error(err_str)
        # raise Exception(err_str)
    elif node.save():
        write_out_csv(data_file_name+'_submitted.csv',
                      fieldnames=csv_fieldnames,values=[record,])
        return node
    else:
        write_out_csv(data_file_name+'_unsaved_records.csv',
                      fieldnames=csv_fieldnames,values=[record,])
        return False
예제 #12
0
def validate_record(parent_id, node, record):
    """update record fields
       validate node
       if valid, save, if not, return false
    """
    log.debug("in validate/save: "+node_type)
    node.rand_subject_id = record['rand_subject_id']
    node.gender = get_gender(record['gender'])
    node.race = get_race(record['race_code'])
    node.tags = list_tags( #node.tags,
                # 'test', # for debug!!
                'age: '+record['age'] if record['age'] else 'unk',
                'study: prediabetes',
                )
    node.links = {'participates_in':[parent_id]}
    if not node.is_valid():
        invalidities = node.validate()
        err_str = "Invalid!\n{}".format("\n".join(invalidities))
        log.error(err_str)
        raise Exception(err_str)
    elif node.save():
        return node
    else:
        return False
예제 #13
0
	NodeLoadFunc = 'load_visitattribute'

	return load_node(internal_id, search_field, NodeTypeName, NodeLoadFunc)


def validate_record(parent_id, node, record, data_file_name=node_type):
	"""update record fields
	   validate node
	   if valid, save, if not, return false
	"""
	log.info("in validate/save: "+node_type)
	csv_fieldnames = get_field_header(data_file_name)

	node.study = 'prediabetes'
    node.subtype = 'prediabetes'
    node.tags = list_tags('')

	parent_link = {'associated_with':[parent_id]}
	log.debug('parent_id: '+str(parent_link))
	node.links = parent_link

	if not node.is_valid():
		invalids = data_file_name[:-4]+'.invalid_records.csv'
		write_csv_headers(invalids, fieldnames=csv_fieldnames)
		write_out_csv(invalids, fieldnames=csv_fieldnames,
					  values=[record,])
		invalidities = node.validate()
		err_str = "Invalid {}!\n\t{}".format(node_type, str(invalidities))
		log.error(err_str)
		# raise Exception(err_str)
	elif node.save():