def process(inDir, outDir, dataDir, cancer, flog, PATHPATTERN, originCancer,
            REALRUN):
    #print status
    print cancer, __name__

    #set output dir
    if not os.path.exists(outDir):
        os.makedirs(outDir)
    if not os.path.exists(outDir + cancer + "/"):
        os.makedirs(outDir + cancer + "/")

    #data processing
    currentFollowUpV = 0.0
    for file in os.listdir(dataDir):
        if file[-5:] == ".html":
            continue
        for pattern in [
                "clinical_sample", "clinical_patient", "clinical_follow_up",
                "auxiliary", "biospecimen_slide", "biospecimen_sample"
        ]:
            if string.find(file, pattern) != -1:
                followUpV = 0.0
                cgFileName = string.replace(file, ".txt", "")
                # the follow_up files has -vn.n version number
                if cgFileName != re.sub(r'_v[1-9]+.[0-9]+', '', cgFileName):
                    followUpV = string.split(
                        string.split(cgFileName, "follow_up_")[1],
                        "_" + string.lower(cancer))[0][1:]
                # the auxillary file does not start with clin
                if cgFileName[0:9] != "clinical_":
                    cgFileName = "clinical_" + cgFileName

                outfile = outDir + cancer + "/" + cgFileName
                cFfile = outfile + "_clinicalFeature"

                if not REALRUN:
                    if os.path.exists(cFfile):
                        tmpClinFeature = ClinicalFeatureNew(cFfile, "tmpName")
                        features = tmpClinFeature.getFeatures()
                        for feature in features:
                            if TCGAUtil.featurePriority.has_key(cancer):
                                if TCGAUtil.featurePriority[cancer].has_key(
                                        feature):
                                    priority = TCGAUtil.featurePriority[
                                        cancer][feature]
                                    tmpClinFeature.setFeaturePriority(
                                        feature, priority)
                                    tmpClinFeature.setFeatureVisibility(
                                        feature, "on")

                            stateOrder = None
                            if TCGAUtil.featureStateOrder.has_key(feature):
                                if TCGAUtil.featureStateOrder[feature].has_key(
                                        cancer):
                                    stateOrder = TCGAUtil.featureStateOrder[
                                        feature][cancer]
                                if TCGAUtil.featureStateOrder[feature].has_key(
                                        "ALL"):
                                    stateOrder = TCGAUtil.featureStateOrder[
                                        feature]["ALL"]
                                print stateOrder
                            if stateOrder:
                                tmpClinFeature.setFeatureValueType(
                                    feature, "category")
                                tmpClinFeature.setFeatureStates(
                                    feature, stateOrder)
                                tmpClinFeature.setFeatureStateOrder(
                                    feature, stateOrder)
                                tmpClinFeature.setFeatureStateOrderRelax(
                                    feature, "true")

                            if TCGAUtil.valueType.has_key(feature):
                                tmpClinFeature.setFeatureValueType(
                                    feature, TCGAUtil.valueType[feature])

                        fout = open(cFfile, 'w')
                        tmpClinFeature.store(fout)
                        fout.close()

                infile = dataDir + file
                #infile often row read has fewer fields than the fieldnames sequence
                # use csv.DictReader and Writer to fix this
                fin = open(infile, 'r')
                reader = csv.DictReader(fin, delimiter="\t", restval="")
                fout = open(".tmp", 'w')
                writer = csv.DictWriter(fout,
                                        delimiter="\t",
                                        fieldnames=reader.fieldnames)
                writer.writer.writerow(reader.fieldnames)
                writer.writerows(reader)
                fout.close()
                fin.close()
                os.system("cp .tmp " + infile)

                if pattern == "clinical_follow_up":
                    print file
                    if cancer == originCancer:
                        cleanupFollowUpFile(infile, ".tmp")
                        os.system("cp .tmp " + infile)

                # slide file need to be remade due to the need to duplicate column as top or bottom
                if pattern == "biospecimen_slide":
                    print file
                    if cancer == originCancer:
                        cleanupSlideFile(infile, ".tmp")
                        os.system("cp .tmp " + infile)

                #clinicalMatrix
                AllowDupCol = True
                if string.find(pattern, "biospecimen_") != -1:
                    SkipLines = [2]
                else:
                    SkipLines = [1, 3]  # 1based

                if os.path.getsize(infile) == 0:
                    continue

                if pattern == "biospecimen_slide":
                    FirstColAuto = 0  #0 based,  already cleaned
                    clinMatrix = ClinicalMatrixNew(infile, "foo", FirstColAuto,
                                                   None, SkipLines,
                                                   AllowDupCol)
                else:
                    FirstColAuto = findIDCol(infile)
                    if FirstColAuto == -1:
                        print infile, "bad header line"
                        continue
                    else:
                        clinMatrix = ClinicalMatrixNew(infile, "foo",
                                                       FirstColAuto, None,
                                                       SkipLines, AllowDupCol)

                clinMatrix.removeCols(["ethnicity", "race",
                                       "jewish_origin"])  #,"patient_id"])

                if pattern == "clinical_sample" or pattern == "biospecimen_sample":
                    if "sample_type" in clinMatrix.getCOLs():
                        add_col_PseudoSample(clinMatrix, "sample_type")
                    if "sample_type_id" in clinMatrix.getCOLs():
                        add_col_PseudoSample(clinMatrix, "sample_type_id")

                #remove all cols with uuid
                features = clinMatrix.getCOLs()
                for f in features:
                    if string.find(f, "uuid") != -1 or string.find(
                            f, "UUID") != -1 or string.find(f, "day_of") != -1:
                        clinMatrix.removeCols([f])

                clinMatrix.replaceValue("null", "")
                clinMatrix.replaceValue("NULL", "")
                clinMatrix.replaceValue("Null", "")
                clinMatrix.replaceValue("NA", "")
                clinMatrix.replaceValue("[null]", "")
                clinMatrix.replaceValue("[NULL]", "")
                clinMatrix.replaceValue("[Null]", "")
                clinMatrix.replaceValue("[NA]", "")
                clinMatrix.replaceValue("[Not Available]", "")
                clinMatrix.replaceValue("[Not Applicable]", "")
                clinMatrix.replaceValue("[Unknown]", "")
                clinMatrix.replaceValue("[Not Reported]", "")
                clinMatrix.replaceValue("[Not Requested]", "")
                clinMatrix.replaceValue("[Not Evaluated]", "")
                clinMatrix.replaceValue("[Completed]", "")
                clinMatrix.replaceValue("[Pending]", "")
                clinMatrix.replaceValue("Not Tested", "")
                clinMatrix.replaceValue("[]", "")
                clinMatrix.replaceValue(",\"", "")
                clinMatrix.replaceValue("\"", "")
                clinMatrix.replaceValue("'", "")
                clinMatrix.replaceValue("`", "")
                clinMatrix.replaceValue("||", "")
                clinMatrix.replaceValueWhole("|", "")
                clinMatrix.replaceValue("LUNG", "Lung")  #stupid BCR
                clinMatrix.replaceValue("MSS|MSS", "MSS")  #stupid BCR
                clinMatrix.replaceValue("Alive", "LIVING")  #stupid BCR
                clinMatrix.replaceValue("ALIVE", "LIVING")  #stupid BCR
                clinMatrix.replaceValue("alive", "LIVING")  #stupid BCR
                clinMatrix.replaceValue("Dead", "DECEASED")  #stupid BCR
                clinMatrix.replaceValue("DEAD", "DECEASED")  #stupid BCR
                clinMatrix.replaceValue("dead", "DECEASED")  #stupid BCR

                oHandle = open(outfile, "w")
                clinMatrix.store(oHandle, validation=True)
                oHandle.close()

                #clinicalFeature

                fout = open(cFfile, "w")
                fout.write("#feature\tattribute\tvalue\n")
                cFeatures = clinMatrix.getCOLs()
                for feature in cFeatures:
                    if not TCGAUtil.featureLongTitle.has_key(feature):
                        longTitle = feature
                        shortTitle = feature
                        message = "Feature Not in dictionary" + "\t" + feature + "\t" + feature
                        flog.write(message + "\n")
                    else:
                        longTitle = TCGAUtil.featureLongTitle[feature]
                        if TCGAUtil.featureShortTitle.has_key(feature):
                            shortTitle = TCGAUtil.featureShortTitle[feature]
                        else:
                            shortTitle = TCGAUtil.featureLongTitle[feature]

                    fout.write(feature + "\tshortTitle\t" + shortTitle + "\n")
                    fout.write(feature + "\tlongTitle\t" + longTitle + "\n")
                    if string.find(feature, "uuid") != -1 or string.find(
                            feature, "UUID") != -1:
                        fout.write(feature + "\tvisibility\toff\n")
                    if TCGAUtil.valueType.has_key(feature):
                        fout.write(feature + "\tvalueType\t" +
                                   TCGAUtil.valueType[feature] + "\n")
                    stateOrder = None
                    if TCGAUtil.featureStateOrder.has_key(feature):
                        if TCGAUtil.featureStateOrder[feature].has_key(cancer):
                            fout.write(feature + "\tvalueType\tcategory\n")
                            stateOrder = TCGAUtil.featureStateOrder[feature][
                                cancer]
                        if TCGAUtil.featureStateOrder[feature].has_key("ALL"):
                            fout.write(feature + "\tvalueType\tcategory\n")
                            stateOrder = TCGAUtil.featureStateOrder[feature][
                                "ALL"]
                        if stateOrder:
                            for state in stateOrder:
                                fout.write(feature + "\tstate\t" + state +
                                           "\n")
                            fout.write(feature + "\tstateOrder\t\"" +
                                       string.join(stateOrder, "\",\"") +
                                       "\"\n")
                            fout.write(feature + "\tstateOrderRelax\ttrue\n")

                    if TCGAUtil.featurePriority.has_key(cancer):
                        if TCGAUtil.featurePriority[cancer].has_key(feature):
                            priority = TCGAUtil.featurePriority[cancer][
                                feature]
                            fout.write(feature + "\tpriority\t" +
                                       str(priority) + "\n")
                            fout.write(feature + "\tvisibility\ton\n")

                    if feature in [
                            "gender", "age_at_initial_pathologic_diagnosis",
                            "days_to_last_followup",
                            "days_to_last_known_alive", "sample_type",
                            "mononucleotide_and_dinucleotide_marker_panel_analysis_status",
                            "percent_stromal_cells_BOTTOM",
                            "percent_tumor_nuclei_BOTTOM"
                    ]:
                        fout.write(feature + "\tvisibility\ton\n")
                fout.close()

                #json
                J = {}
                cFJ = {}

                oHandle = open(outfile + ".json", "w")
                #stable
                if pattern == "clinical_sample":
                    if cancer != originCancer:
                        suffix = "clinSample" + PATHPATTERN + originCancer
                    else:
                        suffix = "clinSample" + PATHPATTERN
                if pattern == "clinical_patient":
                    if cancer != originCancer:
                        suffix = "clinPatient" + PATHPATTERN + originCancer
                    else:
                        suffix = "clinPatient" + PATHPATTERN
                if pattern == "clinical_follow_up":
                    if cancer != originCancer:
                        suffix = cgFileName + originCancer
                    else:
                        suffix = cgFileName
                if pattern == "auxiliary":
                    if cancer != originCancer:
                        suffix = "clinAuxiliary" + PATHPATTERN + originCancer
                    else:
                        suffix = "clinAuxiliary" + PATHPATTERN
                if pattern == "biospecimen_slide":
                    if cancer != originCancer:
                        suffix = "bioSlide" + PATHPATTERN + originCancer
                    else:
                        suffix = "bioSlide" + PATHPATTERN
                if pattern == "biospecimen_sample":
                    if cancer != originCancer:
                        suffix = "bioSample" + PATHPATTERN + originCancer
                    else:
                        suffix = "bioSample" + PATHPATTERN
                J["cgDataVersion"] = 1
                J["redistribution"] = True
                J["dataProducer"] = "TCGA biospecimen core resource"
                J["url"]=TCGAUtil.remoteBase \
                          +string.replace(inDir,TCGAUtil.localBase,"") \
                          + string.replace(dataDir,tmpDir,"")[:-1]
                J["version"] = datetime.date.today().isoformat()
                J["wrangler"] = "cgData TCGAscript " + __name__ + " processed on " + datetime.date.today(
                ).isoformat()
                J["dataSubType"] = "phenotype"
                #change description
                J["wrangling_procedure"] = "Clinical data download from TCGA DCC, processed at UCSC into cgData repository"
                J["description"] = "This dataset is the TCGA " + TCGAUtil.cancerHumanReadable[
                    cancer] + " (" + cancer + ") clinical data."

                #change cgData
                J["name"] = "TCGA_" + cancer + "_" + suffix

                cFJ["name"] = J["name"] + "_clinFeat"

                cFJ["type"] = "clinicalFeature"
                J["type"] = "clinicalMatrix"
                J[":sampleMap"] = "TCGA." + cancer + ".sampleMap"
                J["cohort"] = "TCGA " + TCGAUtil.cancerHumanReadable[
                    cancer] + " (" + cancer + ")"
                J[":clinicalFeature"] = cFJ["name"]
                if pattern == "clinical_follow_up":
                    if cancer != originCancer:
                        J["upToDate"] = str(
                            followUpV) + "_" + originCancer  #"Yes"
                    else:
                        J["upToDate"] = str(followUpV)  #"Yes"
                oHandle.write(json.dumps(J, indent=-1))
                oHandle.close()

                oHandle = open(cFfile + ".json", "w")
                oHandle.write(json.dumps(cFJ, indent=-1))
                oHandle.close()
    return
Ejemplo n.º 2
0
def process(inDir, outDir, cancer, flog, PATHPATTERN, originCancer):
    #print status
    print cancer, __name__

    #set output dir
    if not os.path.exists(outDir):
        os.makedirs(outDir)
    if not os.path.exists(outDir + cancer + "/"):
        os.makedirs(outDir + cancer + "/")

    for file in os.listdir(inDir):
        clinMatrix = None
        clinFeature = None
        clinFfile = ""

        #find the file
        #clinMatrix

        if file[0:6] == PATHPATTERN and os.path.exists(inDir + file + ".json"):
            pass
        else:
            continue

        infile = inDir + file

        #json file processing (validation)
        fjson = open(infile + ".json", "U")
        J = json.load(fjson)
        fjson.close()

        if J["type"] != "clinicalMatrix":
            continue

        #clinFeature
        if J.has_key(":clinicalFeature"):
            clinFname = J[":clinicalFeature"]

            for clinFfile in os.listdir(inDir):
                #find the file
                if not os.path.exists(inDir + clinFfile + ".json"):
                    continue

                fjson = open(inDir + clinFfile + ".json", "U")
                clinFJ = json.load(fjson)
                fjson.close()

                #data processing
                if clinFJ["type"] == "clinicalFeature" and clinFJ[
                        "name"] == clinFname:
                    print originCancer, cancer
                    if cancer != originCancer:
                        clinFname = clinFname + "_" + originCancer
                        clinFJ["name"] = clinFname
                    clinFeature = ClinicalFeatureNew(inDir + clinFfile,
                                                     clinFname)
                    for feature in clinFeature.getFeatures():
                        if TCGAUtil.featurePriority.has_key(cancer):
                            if TCGAUtil.featurePriority[cancer].has_key(
                                    feature):
                                priority = TCGAUtil.featurePriority[cancer][
                                    feature]
                                clinFeature.setFeaturePriority(
                                    feature, priority)
                                clinFeature.setFeatureVisibility(feature, "on")
                    break

        #data processing
        clinMatrix = ClinicalMatrixNew(infile, J["name"], False, clinFeature)
        clinMatrix.removeCols(["ethnicity", "race", "jewish_origin"])
        clinMatrix.replaceValue("null", "")
        clinMatrix.replaceValue("NULL", "")
        clinMatrix.replaceValue("Null", "")
        clinMatrix.replaceValue("NA", "")
        clinMatrix.replaceValue("[null]", "")
        clinMatrix.replaceValue("[NULL]", "")
        clinMatrix.replaceValue("[Null]", "")
        clinMatrix.replaceValue("[NA]", "")
        clinMatrix.replaceValue("[Not Available]", "")
        clinMatrix.replaceValue("[Not Reported]", "")
        clinMatrix.replaceValue("[Not Applicable]", "")
        clinMatrix.replaceValue("[Not Requested]", "")
        clinMatrix.replaceValue("[Completed]", "")
        clinMatrix.replaceValue("[Pending]", "")
        clinMatrix.replaceValue("Not Tested", "")
        clinMatrix.replaceValue("[]", "")
        clinMatrix.replaceValue(",\"", "")
        clinMatrix.replaceValue("\"", "")
        clinMatrix.replaceValue("'", "")
        clinMatrix.replaceValue("`", "")
        clinMatrix.replaceValue("|", "")

        #if cancer != originCancer:
        #    clinMatrix.addOneColWithSameValue("cohort",originCancer)

        #json file processing (validation)
        fjson = open(infile + ".json", "U")
        J = json.load(fjson)
        fjson.close()
        if cancer != originCancer:
            J['name'] = J['name'] + "_" + originCancer

        J[":sampleMap"] = "TCGA." + cancer + ".sampleMap"
        J["cohort"] = "TCGA " + TCGAUtil.cancerHumanReadable[
            cancer] + " (" + cancer + ")"
        name = trackName_fix(J['name'])
        if name == False:
            message = "bad object name, need fix otherwise break loader, too long " + J[
                "name"]
            print message
            flog.write(message + "\n")
            return
        else:
            J["name"] = name

        if cancer != originCancer and J.has_key(":clinicalFeature"):
            J[":clinicalFeature"] = J[":clinicalFeature"] + "_" + originCancer

        J["cgDataVersion"] = 1

        #output matrix
        if cancer != originCancer:
            outfile = outDir + cancer + "/" + file + "_" + originCancer
        else:
            outfile = outDir + cancer + "/" + file

        oHandle = open(outfile, "w")
        clinMatrix.store(oHandle, validation=True)
        oHandle.close()

        fjson = open(outfile + ".json", "w")
        json.dump(J, fjson, indent=-1)
        fjson.close()

        #output clinFeature
        if clinFeature:
            if cancer != originCancer:
                outfile = outDir + cancer + "/" + clinFfile + "_" + originCancer
            else:
                outfile = outDir + cancer + "/" + clinFfile
            fout = open(outfile, 'w')
            clinFeature.store(fout)
            fout.close()

            clinFJ["cgDataVersion"] = 1
            fjson = open(outfile + ".json", "w")
            json.dump(clinFJ, fjson, indent=-1)
            fjson.close()
    return
Ejemplo n.º 3
0
try:
    opts, args = getopt.getopt(sys.argv[1:],"",["run"])
except getopt.GetoptError:
    print "python curatedPhenotype.py originalClinFeature(optional) --run"
    sys.exit()


output = "newClinFeature"
clinFeature = None

if len(args)!=0:
    clinFeatureFile = args[0]
    if os.path.exists(clinFeatureFile):
        clinFeature = ClinicalFeatureNew(clinFeatureFile,'feature')
    else:
        print args[0],"does not exist"
        sys.exit()
else:
    clinFeature = ClinicalFeatureNew(None,'feature')
    fout = open(output+".json",'w')
    J={}
    J["type"]="clinicalFeature"
    fout.write(json.dumps(J, indent=2))
    fout.close()

curatedPhenotypeClinFeature(clinFeature)

fout = open(output,'w')
clinFeature.store(fout)
print "output:", output
#https://docs.python.org/3.1/library/getopt.html
try:
    opts, args = getopt.getopt(sys.argv[1:], "", ["run"])
except getopt.GetoptError:
    print "python curatedPhenotype.py originalClinFeature(optional) --run"
    sys.exit()

output = "newClinFeature"
clinFeature = None

if len(args) != 0:
    clinFeatureFile = args[0]
    if os.path.exists(clinFeatureFile):
        clinFeature = ClinicalFeatureNew(clinFeatureFile, 'feature')
    else:
        print args[0], "does not exist"
        sys.exit()
else:
    clinFeature = ClinicalFeatureNew(None, 'feature')
    fout = open(output + ".json", 'w')
    J = {}
    J["type"] = "clinicalFeature"
    fout.write(json.dumps(J, indent=2))
    fout.close()

curatedPhenotypeClinFeature(clinFeature)

fout = open(output, 'w')
clinFeature.store(fout)
print "output:", output
Ejemplo n.º 5
0
def process (inDir,outDir,cancer,flog,PATHPATTERN,originCancer):
    #print status
    print cancer, __name__

    #set output dir
    if not os.path.exists( outDir ):
        os.makedirs( outDir )
    if not os.path.exists( outDir +cancer+"/"):
        os.makedirs( outDir+cancer+"/" )

    for file in os.listdir(inDir):
        clinMatrix = None
        clinFeature =None
        clinFfile=""

        #find the file
        #clinMatrix

        if file[0:6]== PATHPATTERN and os.path.exists(inDir+ file+".json") :
            pass
        else:
            continue

        infile = inDir+file

        #json file processing (validation)
        fjson= open(infile+".json","U")
        J =json.load(fjson)
        fjson.close()

        if J["type"]!="clinicalMatrix":
            continue

        #clinFeature
        if J.has_key(":clinicalFeature"):
            clinFname = J[":clinicalFeature"]
       
            for clinFfile in os.listdir(inDir):
                #find the file
                if not os.path.exists(inDir+ clinFfile+".json"):
                    continue

                fjson= open(inDir+clinFfile+".json","U")
                clinFJ =json.load(fjson)
                fjson.close()

                #data processing
                if clinFJ["type"]=="clinicalFeature" and clinFJ["name"]==clinFname:
                    print originCancer, cancer
                    if cancer != originCancer:
                        clinFname= clinFname+"_"+originCancer
                        clinFJ["name"]=clinFname
                    clinFeature= ClinicalFeatureNew(inDir+clinFfile,clinFname)
                    for feature in clinFeature.getFeatures():
                        if TCGAUtil.featurePriority.has_key(cancer):
                            if TCGAUtil.featurePriority[cancer].has_key(feature):
                                priority= TCGAUtil.featurePriority[cancer][feature]
                                clinFeature.setFeaturePriority(feature, priority)
                                clinFeature.setFeatureVisibility(feature, "on")
                    break
                
        #data processing
        clinMatrix = ClinicalMatrixNew(infile, J["name"], False, clinFeature)
        clinMatrix.removeCols(["ethnicity","race","jewish_origin"])
        clinMatrix.replaceValue("null","")
        clinMatrix.replaceValue("NULL","")
        clinMatrix.replaceValue("Null","")
        clinMatrix.replaceValue("NA","")
        clinMatrix.replaceValue("[null]","")
        clinMatrix.replaceValue("[NULL]","")
        clinMatrix.replaceValue("[Null]","")
        clinMatrix.replaceValue("[NA]","")
        clinMatrix.replaceValue("[Not Available]","")
        clinMatrix.replaceValue("[Not Reported]","")
        clinMatrix.replaceValue("[Not Applicable]","")
        clinMatrix.replaceValue("[Not Requested]","")
        clinMatrix.replaceValue("[Completed]","")
        clinMatrix.replaceValue("[Pending]","")
        clinMatrix.replaceValue("Not Tested","")
        clinMatrix.replaceValue("[]","")
        clinMatrix.replaceValue(",\"","")
        clinMatrix.replaceValue("\"","")
        clinMatrix.replaceValue("'","")
        clinMatrix.replaceValue("`","")
        clinMatrix.replaceValue("|","")
        
        #if cancer != originCancer:
        #    clinMatrix.addOneColWithSameValue("cohort",originCancer)

        #json file processing (validation)
        fjson= open(infile+".json","U")
        J =json.load(fjson)
        fjson.close()
        if cancer != originCancer:
            J['name'] = J['name'] +"_"+originCancer

        J[":sampleMap"]="TCGA."+cancer+".sampleMap"
        J["cohort"]="TCGA "+TCGAUtil.cancerHumanReadable[cancer]+" ("+cancer+")"
        name = trackName_fix(J['name'])
        if name ==False:
            message = "bad object name, need fix otherwise break loader, too long "+J["name"]
            print message
            flog.write(message+"\n")
            return
        else:
            J["name"]=name

        if cancer != originCancer and J.has_key(":clinicalFeature"):
            J[":clinicalFeature"] =  J[":clinicalFeature"] +"_"+originCancer

        J["cgDataVersion"]=1

        #output matrix
        if cancer != originCancer:
            outfile = outDir+cancer+"/"+file+"_"+originCancer
        else:
            outfile = outDir+cancer+"/"+file
            
        oHandle = open(outfile,"w")
        clinMatrix.store(oHandle, validation=True)
        oHandle.close()

        fjson = open(outfile+".json","w")
        json.dump(J, fjson, indent=-1)
        fjson.close()

        #output clinFeature 
        if clinFeature:
            if cancer != originCancer:
                outfile = outDir+cancer+"/"+clinFfile+"_"+originCancer
            else:
                outfile = outDir+cancer+"/"+clinFfile
            fout=open(outfile,'w')
            clinFeature.store(fout)
            fout.close()

            clinFJ["cgDataVersion"]=1
            fjson = open(outfile+".json","w")
            json.dump(clinFJ, fjson, indent=-1)
            fjson.close()
    return