コード例 #1
0
def addDataset(label, feature_matrix, associations, method, source, description, comments, configfile, results_path, ds_date, disease, contact):
	print "Adding " + source + " dataset to admin table with config " + configfile + " for label " + label 
	if (description == ""):
		#not general, revisit this to enter all TCGA known cancers
		if (label.find("brca") != -1 or label.find("BRCA") != -1):
			description = "Breast"
		if (label.find("ov") != -1 or label.find("OV") != -1):
			description = description + "Ovarian"
		if (label.find("gbm") != -1 or label.find("GBM") != -1):
			description = description + "Glioblastoma"
		if (label.find("coadread") != -1 or label.find("COAD") != -1 or label.find("coad") != -1 or label.find("crc") != -1 or label.find("CRC") != -1):
			description = description + "ColoRectal"
		if (label.find("cesc") != -1 or label.find("CESC") != -1):
			description = description + "Cervical"
		if (label.find("hnsc") != -1 or label.find("HNSC") != -1):
			description = description + "HeadNeck"
		if (label.find("kirc") != -1 or label.find("KIRC") != -1 or label.find("kirp") != -1  or label.find("KIRP") != -1):
			description = description + "Kidney"
		if (label.find("luad") != -1 or label.find("LUAD") != -1 or label.find("lusc") != -1  or label.find("LUSC") != -1):
			description = description + "Lung"
		if (label.find("stad") != -1 or label.find("STAD") != -1):
			description = description + "Stomach"	
		if (label.find("nomask") != -1):
			description = description
		elif (label.find("mask") != -1):
			description = description + " filtered"
		
	if (comments == ""):
		comments = "{matrix:"+feature_matrix+",associations:"+associations+"}"
	inputfiles = "{matrix:"+feature_matrix+",associations:"+associations+"}"
	currentDate = time.strftime("%m-%d-%y")
	config = db_util.getConfig(configfile)
	max_logpv = -1.0
	if (os.path.exists(results_path + 'edges_out_' + label + '_meta.json')):
		meta_json_file = open(results_path + 'edges_out_' + label + '_meta.json','r')
		metaline = meta_json_file.read()
		if (len(metaline) > 1):
			try:
				max_logpv = json.loads(metaline)["max_logpv"]
			except ValueError:
				max_logpv = -1
				#okay that the max_logpv is not set
			except:
				print "Unexpected error:", sys.exc_info()[0]
				raise
		meta_json_file.close()	
	summary_json = ""
	if (os.path.exists(results_path + "feature_summary_" + label + ".json")):
		summary_file = open(results_path + "feature_summary_" + label + ".json", "r")
		summary_json = summary_file.read().strip()
		summary_file.close()	
	insertSql = "replace into tcga.regulome_explorer_dataset (label,method,source,contact,comments,dataset_date,description,max_logged_pvalue, input_files, default_display,disease,summary_json) values ('%s', '%s', '%s', '%s', '%s', '%s', '%s', %f, '%s', '%i', '%s', '%s');" %(label, method, source, contact, comments,ds_date,description, max_logpv, inputfiles, 1, disease, summary_json)
	print "updating regulome_explorer_dataset\n" + insertSql
	db_util.executeInsert(config, insertSql)
コード例 #2
0
def loadConfig(env):
    configFile = ""
    if env == "internal":
        configFile = "../config/rfex_sql_sandbox.config"
    elif env == "":
        configFile = "../config/rfex_sql.config"
    elif env == "gdac":
        configFile = "../config/rfex_sql_gdac.config"
    else:
        print "The env selected is invalid " + env
        sys.exit(-1)
    config = db_util.getConfig(configFile)
    return config
コード例 #3
0
def loadConfig(env):
	configFile = ""
	if (env == "internal"):
		configFile = "../config/rfex_sql_sandbox.config"
	elif (env == ""):
		configFile = "../config/rfex_sql.config"
	elif (env == "gdac"):
		configFile = "../config/rfex_sql_gdac.config"
	else:
		print "The env selected is invalid " + env
		sys.exit(-1)
	config = db_util.getConfig(configFile)
	return config
コード例 #4
0
def main(dataset_label, feature_matrix, associations, pvalueRepresentation, configfile, resultsPath, doPubcrawl, contacts, keep_unmapped, featureInterestingFile):
	print "\n in parse_pairwise : dataset_label = <%s> \n" % dataset_label
	config = db_util.getConfig(configfile)
	#results_path = db_util.getResultsPath(config)
	#if (not os.path.exists(results_path + "/" + dataset_label)):
	#	os.mkdir(results_path + "/" + dataset_label)
	print "Done with processing features, processing pairwise edges %s " %(time.ctime())
	pvlambda = db_util.reflective
	if (pvalueRepresentation == "negative"):
		pvlambda = db_util.negative
	elif (pvalueRepresentation == "negative_log10"):
		pvlambda = db_util.negative_log10
	elif (pvalueRepresentation == "absolute"):
		pvlambda = db_util.absolute
	process_pairwise_edges(dataset_label, feature_matrix, associations, pvlambda, config, resultsPath, doPubcrawl,  contacts, int(keep_unmapped), featureInterestingFile)
	print "Done with processing pairwise edges %s " %(time.ctime())
コード例 #5
0
def updateFromTemplate(label, template, configfile, resultsPath):
	template_file = open(template)
	schema_out_name = template_file.name.replace('template', label)
	schema_out_name = schema_out_name.replace('sql', "sql_processing", 1)
	sql_processing_dir = resultsPath + "/sql_processing"
	if (not os.path.exists(sql_processing_dir)):
		os.system("mkdir " + sql_processing_dir)
		os.system("chmod 777 " + sql_processing_dir)
	schema_out_name = sql_processing_dir + "/" + schema_out_name.split("/")[-1]
	schema_file = open(schema_out_name,'w')
	config = db_util.getConfig(configfile)
	schema_file.write("use %s;\n" %(db_util.getDBSchema(config)))
	for line in template_file:
		schema_file.write(line.replace(placeHolder, label))
	schema_file.close()
	template_file.close()
	executeSchema(schema_file.name, config)
	print "Done creating schema file from template %s" % time.ctime()
コード例 #6
0
def main(dataset_label, feature_matrix, associations, pvalueRepresentation,
         configfile, resultsPath, doPubcrawl, contacts, keep_unmapped,
         featureInterestingFile):
    print "\n in parse_pairwise : dataset_label = <%s> \n" % dataset_label
    config = db_util.getConfig(configfile)
    #results_path = db_util.getResultsPath(config)
    #if (not os.path.exists(results_path + "/" + dataset_label)):
    #	os.mkdir(results_path + "/" + dataset_label)
    print "Done with processing features, processing pairwise edges %s " % (
        time.ctime())
    pvlambda = db_util.reflective
    if (pvalueRepresentation == "negative"):
        pvlambda = db_util.negative
    elif (pvalueRepresentation == "negative_log10"):
        pvlambda = db_util.negative_log10
    elif (pvalueRepresentation == "absolute"):
        pvlambda = db_util.absolute
    process_pairwise_edges(dataset_label, feature_matrix, associations,
                           pvlambda, config, resultsPath, doPubcrawl, contacts,
                           int(keep_unmapped), featureInterestingFile)
    print "Done with processing pairwise edges %s " % (time.ctime())
コード例 #7
0

def getFeatures():
    return features_hash


def getFeatureId(featureStr):
    return features_hash.get(featureStr)


if __name__ == "__main__":
    global datast_label
    print "Parsing features kicked off %s" % time.ctime()
    if (len(sys.argv) < 7):
        print 'Usage is py2.6 parse_features_rfex.py data_matrix.tsv dataset_label configFile annotations quantileFeatures resultsPath'
        sys.exit(1)
    dataset_label = sys.argv[2]
    print "\nin parse_features_rfex : dataset_label = <%s>\n" % dataset_label
    configfile = sys.argv[3]
    config = db_util.getConfig(configfile)
    annotations = sys.argv[4]
    quantileFeatures = sys.argv[5]
    resultsPath = sys.argv[6]
    featureInterestingFile = ""
    if (len(sys.argv) == 8):
        featureInterestingFile = sys.argv[7]
    process_feature_matrix(dataset_label, sys.argv[1], 1, config, annotations,
                           quantileFeatures, resultsPath,
                           featureInterestingFile)
    print "Done with processing feature relating loads %s " % (time.ctime())
コード例 #8
0
def addDataset(
    label,
    feature_matrix,
    associations,
    method,
    source,
    description,
    comments,
    configfile,
    results_path,
    ds_date,
    disease,
    contact,
):
    print "Adding " + source + " dataset to admin table with config " + configfile + " for label " + label
    if description == "":
        # not general, revisit this to enter all TCGA known cancers
        if label.find("brca") != -1 or label.find("BRCA") != -1:
            description = "Breast"
        if label.find("ov") != -1 or label.find("OV") != -1:
            description = description + "Ovarian"
        if label.find("gbm") != -1 or label.find("GBM") != -1:
            description = description + "Glioblastoma"
        if (
            label.find("coadread") != -1
            or label.find("COAD") != -1
            or label.find("coad") != -1
            or label.find("crc") != -1
            or label.find("CRC") != -1
        ):
            description = description + "ColoRectal"
        if label.find("cesc") != -1 or label.find("CESC") != -1:
            description = description + "Cervical"
        if label.find("hnsc") != -1 or label.find("HNSC") != -1:
            description = description + "HeadNeck"
        if label.find("kirc") != -1 or label.find("KIRC") != -1 or label.find("kirp") != -1 or label.find("KIRP") != -1:
            description = description + "Kidney"
        if label.find("luad") != -1 or label.find("LUAD") != -1 or label.find("lusc") != -1 or label.find("LUSC") != -1:
            description = description + "Lung"
        if label.find("stad") != -1 or label.find("STAD") != -1:
            description = description + "Stomach"
        if label.find("nomask") != -1:
            description = description
        elif label.find("mask") != -1:
            description = description + " filtered"

    if comments == "":
        comments = "{matrix:" + feature_matrix + ",associations:" + associations + "}"
    inputfiles = "{matrix:" + feature_matrix + ",associations:" + associations + "}"
    currentDate = time.strftime("%m-%d-%y")
    config = db_util.getConfig(configfile)
    max_logpv = -1.0
    if os.path.exists(results_path + "edges_out_" + label + "_meta.json"):
        meta_json_file = open(results_path + "edges_out_" + label + "_meta.json", "r")
        metaline = meta_json_file.read()
        if len(metaline) > 1:
            try:
                max_logpv = json.loads(metaline)["max_logpv"]
            except ValueError:
                max_logpv = -1
                # okay that the max_logpv is not set
            except:
                print "Unexpected error:", sys.exc_info()[0]
                raise
        meta_json_file.close()
    summary_json = ""
    if os.path.exists(results_path + "feature_summary_" + label + ".json"):
        summary_file = open(results_path + "feature_summary_" + label + ".json", "r")
        summary_json = summary_file.read().strip()
        summary_file.close()
    insertSql = (
        "replace into tcga.regulome_explorer_dataset (label,method,source,contact,comments,dataset_date,description,max_logged_pvalue, input_files, default_display,disease,summary_json) values ('%s', '%s', '%s', '%s', '%s', '%s', '%s', %f, '%s', '%i', '%s', '%s');"
        % (
            label,
            method,
            source,
            contact,
            comments,
            ds_date,
            description,
            max_logpv,
            inputfiles,
            1,
            disease,
            summary_json,
        )
    )
    print "updating regulome_explorer_dataset\n" + insertSql
    db_util.executeInsert(config, insertSql)
コード例 #9
0
ファイル: batchimporter.py プロジェクト: amergin/neo4j-import
    if not os.access(jarPath, os.R_OK):
        print "Could not open JAR file at %s, check setting 'batch_path' in config. EXIT" % (jarPath)
        sys.exit(-1)


if __name__ == "__main__":
    if len(sys.argv) is not 2:
        print "Usage is py2.6 neo4j_csv.py batch_import.config"
        sys.exit(-1)

    config_file = sys.argv[1]
    if not os.access(config_file, os.R_OK):
        print "Could not open config file. EXIT"
        sys.exit(-1)

    config = db_util.getConfig(config_file)
    checkImportProgram(config)

    if not os.path.exists(getBatchTSVPath(config)):
        print "TSV directory does not exist, creating."
        os.makedirs(getBatchTSVPath(config))

    if not os.path.exists(getMysqlDumps(config)):
        print "Dump directory does not exist, creating"
        os.makedirs(getMysqlDumps(config))

    print "Data import started at %s" % (str(datetime.datetime.now()))

    importer = DatasetImporter(config)
    t = timeit.Timer(importer.start, "gc.enable()")
    importTime = t.timeit(1)