Example #1
0
def addDataset(label, feature_matrix, associations, method, source, description, comments, configfile, results_path, ds_date, disease, contact):
	print "Adding " + source + " dataset to admin table with config " + configfile + " for label " + label 
	if (description == ""):
		#not general, revisit this to enter all TCGA known cancers
		if (label.find("brca") != -1 or label.find("BRCA") != -1):
			description = "Breast"
		if (label.find("ov") != -1 or label.find("OV") != -1):
			description = description + "Ovarian"
		if (label.find("gbm") != -1 or label.find("GBM") != -1):
			description = description + "Glioblastoma"
		if (label.find("coadread") != -1 or label.find("COAD") != -1 or label.find("coad") != -1 or label.find("crc") != -1 or label.find("CRC") != -1):
			description = description + "ColoRectal"
		if (label.find("cesc") != -1 or label.find("CESC") != -1):
			description = description + "Cervical"
		if (label.find("hnsc") != -1 or label.find("HNSC") != -1):
			description = description + "HeadNeck"
		if (label.find("kirc") != -1 or label.find("KIRC") != -1 or label.find("kirp") != -1  or label.find("KIRP") != -1):
			description = description + "Kidney"
		if (label.find("luad") != -1 or label.find("LUAD") != -1 or label.find("lusc") != -1  or label.find("LUSC") != -1):
			description = description + "Lung"
		if (label.find("stad") != -1 or label.find("STAD") != -1):
			description = description + "Stomach"	
		if (label.find("nomask") != -1):
			description = description
		elif (label.find("mask") != -1):
			description = description + " filtered"
		
	if (comments == ""):
		comments = "{matrix:"+feature_matrix+",associations:"+associations+"}"
	inputfiles = "{matrix:"+feature_matrix+",associations:"+associations+"}"
	currentDate = time.strftime("%m-%d-%y")
	config = db_util.getConfig(configfile)
	max_logpv = -1.0
	if (os.path.exists(results_path + 'edges_out_' + label + '_meta.json')):
		meta_json_file = open(results_path + 'edges_out_' + label + '_meta.json','r')
		metaline = meta_json_file.read()
		if (len(metaline) > 1):
			try:
				max_logpv = json.loads(metaline)["max_logpv"]
			except ValueError:
				max_logpv = -1
				#okay that the max_logpv is not set
			except:
				print "Unexpected error:", sys.exc_info()[0]
				raise
		meta_json_file.close()	
	summary_json = ""
	if (os.path.exists(results_path + "feature_summary_" + label + ".json")):
		summary_file = open(results_path + "feature_summary_" + label + ".json", "r")
		summary_json = summary_file.read().strip()
		summary_file.close()	
	insertSql = "replace into tcga.regulome_explorer_dataset (label,method,source,contact,comments,dataset_date,description,max_logged_pvalue, input_files, default_display,disease,summary_json) values ('%s', '%s', '%s', '%s', '%s', '%s', '%s', %f, '%s', '%i', '%s', '%s');" %(label, method, source, contact, comments,ds_date,description, max_logpv, inputfiles, 1, disease, summary_json)
	print "updating regulome_explorer_dataset\n" + insertSql
	db_util.executeInsert(config, insertSql)
def populate_sample_meta(sampleList, config):
	"""
	sampleList needs to be a list of patients
	"""
	global dataset_label
	labelTokens = dataset_label.split("_")
	cancer_type = labelTokens[0]
	clabel = ""
	samColIndex = 0

	for sam in sampleList:
		#REPLACE INTO `tcga`.`SampleMeta` (sample_key,cancer_type,dataset_label,matrix_col_offset,meta_json) VALUES ('a' /*not nullable*/,'s' /*not nullable*/,'s' /*not nullable*/,0,'s');		
		insertSampleSql = "replace into sample_meta (sample_key,cancer_type,dataset_label,matrix_col_offset,meta_json) values ('%s', '%s', '%s', '%i', '%s');" %(sam, cancer_type,clabel,samColIndex,"{age:X,status:someStatus,comments:some comments}")
		db_util.executeInsert(config, insertSampleSql)
		samColIndex += 1
	print "Done populating sample list for " + dataset_label
def populate_sample_meta(sampleList, config):
    """
	sampleList needs to be a list of patients
	"""
    global dataset_label
    labelTokens = dataset_label.split("_")
    cancer_type = labelTokens[0]
    clabel = ""
    samColIndex = 0

    for sam in sampleList:
        #REPLACE INTO `tcga`.`SampleMeta` (sample_key,cancer_type,dataset_label,matrix_col_offset,meta_json) VALUES ('a' /*not nullable*/,'s' /*not nullable*/,'s' /*not nullable*/,0,'s');
        insertSampleSql = "replace into sample_meta (sample_key,cancer_type,dataset_label,matrix_col_offset,meta_json) values ('%s', '%s', '%s', '%i', '%s');" % (
            sam, cancer_type, clabel, samColIndex,
            "{age:X,status:someStatus,comments:some comments}")
        db_util.executeInsert(config, insertSampleSql)
        samColIndex += 1
    print "Done populating sample list for " + dataset_label
        sys.exit(-1)
    if operation.upper() == "ADD":
        pathwaymembers = raw_input("Enter pathway members(required and comma separated) e.g.\nTP53,GENE1,GENE2...\n")
        pathwayurl = raw_input("Enter pathway source url(optional)\n")

        if len(pathwaymembers) < 1:
            print "Invalid pathway defined, check your inputs"
            sys.exit(-1)
            # print "name %s\n members %s\n source %s\n url%s" %(pathwayname, pathwaymembers, pathwaysource, pathwayurl)
        insertSql = "insert into random_forest.pathways values('%s', '%s', '%s', '%s')" % (
            pathwaysource,
            pathwayname,
            pathwayurl,
            pathwaymembers,
        )
        rc = db_util.executeInsert(config, insertSql)
        if rc >= 0:
            print "%s added" % pathwayname
        else:
            print "Problems with adding - return code is %i" % rc
    elif operation.upper() == "DELETE":
        deleteSql = "delete from random_forest.pathways where pname = '%s' and psource = '%s'" % (
            pathwayname,
            pathwaysource,
        )
        rc = db_util.executeInsert(config, deleteSql)
        if rc >= 0:
            print "%s removed" % pathwayname
        else:
            print "Problems with deleting - return code is %i" % rc
    else:
	operation = raw_input("ADD or DELETE pathways?(required)\n")
	pathwayname = raw_input("Enter pathway name(required)\n")
	pathwaysource = raw_input("Enter pathway source(required but custom okay)\n")
	
	if (len(pathwayname) < 1 or len(pathwaysource) < 1):
		print "Invalid pathway defined, check your inputs"
		sys.exit(-1)
	if (operation.upper() == "ADD"):
		pathwaymembers = raw_input("Enter pathway members(required and comma separated) e.g.\nTP53,GENE1,GENE2...\n")
		pathwayurl = raw_input("Enter pathway source url(optional)\n")
        
		if (len(pathwaymembers) < 1):
			print "Invalid pathway defined, check your inputs"
			sys.exit(-1)
		#print "name %s\n members %s\n source %s\n url%s" %(pathwayname, pathwaymembers, pathwaysource, pathwayurl) 
		insertSql = "insert into random_forest.pathways values('%s', '%s', '%s', '%s')" %(pathwaysource, pathwayname, pathwayurl,pathwaymembers)
		rc = db_util.executeInsert(config, insertSql)
		if (rc >= 0):
			print "%s added" %pathwayname
		else:
			print "Problems with adding - return code is %i" % rc
	elif (operation.upper() == "DELETE"):
		deleteSql = "delete from random_forest.pathways where pname = '%s' and psource = '%s'" %(pathwayname, pathwaysource)
		rc = db_util.executeInsert(config, deleteSql)
		if (rc >= 0):
			print "%s removed" %pathwayname
		else:
			print "Problems with deleting - return code is %i" % rc
	else:
		print "operation %s not supported" %(operation)	
def addDataset(
    label,
    feature_matrix,
    associations,
    method,
    source,
    description,
    comments,
    configfile,
    results_path,
    ds_date,
    disease,
    contact,
):
    print "Adding " + source + " dataset to admin table with config " + configfile + " for label " + label
    if description == "":
        # not general, revisit this to enter all TCGA known cancers
        if label.find("brca") != -1 or label.find("BRCA") != -1:
            description = "Breast"
        if label.find("ov") != -1 or label.find("OV") != -1:
            description = description + "Ovarian"
        if label.find("gbm") != -1 or label.find("GBM") != -1:
            description = description + "Glioblastoma"
        if (
            label.find("coadread") != -1
            or label.find("COAD") != -1
            or label.find("coad") != -1
            or label.find("crc") != -1
            or label.find("CRC") != -1
        ):
            description = description + "ColoRectal"
        if label.find("cesc") != -1 or label.find("CESC") != -1:
            description = description + "Cervical"
        if label.find("hnsc") != -1 or label.find("HNSC") != -1:
            description = description + "HeadNeck"
        if label.find("kirc") != -1 or label.find("KIRC") != -1 or label.find("kirp") != -1 or label.find("KIRP") != -1:
            description = description + "Kidney"
        if label.find("luad") != -1 or label.find("LUAD") != -1 or label.find("lusc") != -1 or label.find("LUSC") != -1:
            description = description + "Lung"
        if label.find("stad") != -1 or label.find("STAD") != -1:
            description = description + "Stomach"
        if label.find("nomask") != -1:
            description = description
        elif label.find("mask") != -1:
            description = description + " filtered"

    if comments == "":
        comments = "{matrix:" + feature_matrix + ",associations:" + associations + "}"
    inputfiles = "{matrix:" + feature_matrix + ",associations:" + associations + "}"
    currentDate = time.strftime("%m-%d-%y")
    config = db_util.getConfig(configfile)
    max_logpv = -1.0
    if os.path.exists(results_path + "edges_out_" + label + "_meta.json"):
        meta_json_file = open(results_path + "edges_out_" + label + "_meta.json", "r")
        metaline = meta_json_file.read()
        if len(metaline) > 1:
            try:
                max_logpv = json.loads(metaline)["max_logpv"]
            except ValueError:
                max_logpv = -1
                # okay that the max_logpv is not set
            except:
                print "Unexpected error:", sys.exc_info()[0]
                raise
        meta_json_file.close()
    summary_json = ""
    if os.path.exists(results_path + "feature_summary_" + label + ".json"):
        summary_file = open(results_path + "feature_summary_" + label + ".json", "r")
        summary_json = summary_file.read().strip()
        summary_file.close()
    insertSql = (
        "replace into tcga.regulome_explorer_dataset (label,method,source,contact,comments,dataset_date,description,max_logged_pvalue, input_files, default_display,disease,summary_json) values ('%s', '%s', '%s', '%s', '%s', '%s', '%s', %f, '%s', '%i', '%s', '%s');"
        % (
            label,
            method,
            source,
            contact,
            comments,
            ds_date,
            description,
            max_logpv,
            inputfiles,
            1,
            disease,
            summary_json,
        )
    )
    print "updating regulome_explorer_dataset\n" + insertSql
    db_util.executeInsert(config, insertSql)