def main(): parser = argparse.ArgumentParser(description="""Reads an input file that is a 96 well plate and transposes it to yield the column letter.""") parser.add_argument('-i', '--genomicData', required=True, help='''The file containing elements you want to change. The input file should just contain data and no column/ row headers etc''') parser.add_argument('-c', '--clinicalData', required=True, help='The file that contains the metadata') parser.add_argument('-o', '--outputData', required=False, help='The file you get at the end') args = parser.parse_args() rawTable = aUsefulFunctionsFiltering.readAfile(args.genomicData) diseaseCode = aUsefulFunctionsFiltering.readAfile(args.clinicalData)[1:] # Initialise the dictionary and read in patient as key and the tumour type as the value diseaseDict = {} for entry in diseaseCode: k = entry[0] # The column that specifies tumour or normal # v = entry[97] # Breast cancer v = entry[149] diseaseDict[k] = v # Extract the header of the genomic file which is the patient header = rawTable[0] #Initialise a list to hold the phenotype labels phenotype = [] for patient in header: if patient in diseaseDict.keys(): phenotype.append(diseaseDict[patient]) # Change the phenotype labels to be one word fixedPhenotype = [] for case in phenotype: if case == 'Solid Tissue Normal':#'Recurrent Tumor': case = 'Normal' elif case == 'Primary Tumor': case = 'Primary_Tumor' # elif case == 'Solid Tissue Normal': # case = 'Normal' else: case = 'NA' fixedPhenotype.append(case) # Store the unique values of the phenoypes sampleAnnotation = set(fixedPhenotype) # Number of samples sampleNo = len(phenotype) classNo = len(sampleAnnotation) # Emit output print str(sampleNo) + '\t' + str(classNo) + '\t1' #print '#\t' + '\t'.join(sampleAnnotation) # The class labels need to be in order print '#\t' + '\tPrimary_Tumor\tNormal\tNA' print '\t'.join(fixedPhenotype)
def main(): parser = argparse.ArgumentParser( description= """Reads an input file that is a 96 well plate and transposes it to yield the column letter.""") parser.add_argument( '-i', '--genomicData', required=True, help='''The file containing elements you want to change. The input file should just contain data and no column/ row headers etc''' ) parser.add_argument('-c', '--clinicalData', required=True, help='The file that contains the metadata') parser.add_argument('-o', '--outputData', required=False, help='The file you get at the end') args = parser.parse_args() rawTable = aUsefulFunctionsFiltering.readAfile(args.genomicData) diseaseCode = aUsefulFunctionsFiltering.readAfile(args.clinicalData)[1:] # Initialise the dictionary and read in patient as key and the tumour type as the value diseaseDict = {} for entry in diseaseCode: k = entry[0] # The column that specifies tumour or normal # v = entry[97] # Breast cancer v = entry[149] diseaseDict[k] = v # Extract the header of the genomic file which is the patient header = rawTable[0] #Initialise a list to hold the phenotype labels phenotype = [] for patient in header: if patient in diseaseDict.keys(): phenotype.append(diseaseDict[patient]) # Change the phenotype labels to be one word fixedPhenotype = [] for case in phenotype: if case == 'Solid Tissue Normal': #'Recurrent Tumor': case = 'Normal' elif case == 'Primary Tumor': case = 'Primary_Tumor'
def main(): parser = argparse.ArgumentParser(description="""Reads an input file that is either a gene expression matrix or a list of phenotype labels and returns a file suitable for analysis with GSEA""") parser.add_argument('-i', '--inputData', required=True, help='The file containing the data to be converted') parser.add_argument('-o', '--outputData', required=False, help='The file you get at the end') parser.add_argument('-t', '--typeOfFile', required=True, help='Either gem for gene expression matrix or phenotype for phenotype list') args = parser.parse_args() data = aUsefulFunctionsFiltering.readAfile(args.inputData) # The switch that calls either of the helper functions if args.typeOfFile == 'gem': result = convertGEMtoGCT(data) # The tabs mess up formatting so print header separaetly header = result[0:2] for line in header: print line for line in result[2:]: print '\t'.join(line) elif args.typeOfFile == 'phenotype': result = convertLabelstoCLS(data) for line in result: print '\t'.join(line) else: print 'You have specified the wrong type of argument to type of file'
def parsePicogreenOutput384(picoGreenOutput): 'Reads the output of the Picogreen assay in .txt format and removes all the unecessary parts of the file' # Read in the input file. First test if it is a csv if picoGreenOutput[-4:] != '.txt': print 'Your input file is not in tab format. It is probably in .xls and you should convert to .tab first' dat = aUsefulFunctionsFiltering.readAfile(args.inputFile) dat = dat[21:37] # Cut out the first and last columns plateMap = [row[2:25] for row in dat] # Change the commas to points noCommas = [] for row in plateMap: noCommas.append([i.replace(",", ".") for i in row]) print noCommas return noCommas
def main(): parser = argparse.ArgumentParser( description= """Reads an input file that is a 384 well plate and transposes it to yield the column letter.""") parser.add_argument( '-i', '--inputData', required=True, help='''The file containing elements you want to change.''') parser.add_argument('-o', '--outputData', required=False, help='The file you get at the end') args = parser.parse_args() plateMap = aUsefulFunctionsFiltering.readAfile(args.inputData) # Remove the header of the file plateMap = plateMap[1:] # Remove the row name from the file plateMap3 = [i[1:] for i in plateMap] # Flatten the list of lists data structure using itertools plateMap2 = list(itertools.chain.from_iterable(plateMap3)) # Write in the well names letters = list(string.ascii_uppercase) letters = letters[0:16] number = range(1, 25) wells = [] for letter in letters: for num in number: x = letter + str(num) wells.append(x) # Write out to file if one is provided on cammand line if args.outputData == True: w = open(args.outputData, 'w') writer = csv.writer(w, delimiter="\t") ###################################### Fix the iteration of this writer.writerow(zip(wells, plateMap2)) ###################################### for well, gene in zip(wells, plateMap2): print well + '\t' + gene
def main(): parser = argparse.ArgumentParser(description="""Reads an input file that is a 384 well plate and transposes it to yield the column letter and the name of the gene.""") parser.add_argument('-i', '--inputData', required=True, help='The file containing elements you want to change') parser.add_argument('-o', '--outputData', required=False, help='The file you get at the end') args = parser.parse_args() plateMap = aUsefulFunctionsFiltering.readAfile(args.inputData) # Remove the header of the file plateMap = plateMap[1:] # Remove the first entry of each row (the row name) for row in plateMap: del(row[0]) # Flatten the list of lists data structure using itertools plateMap2 = list(itertools.chain.from_iterable(plateMap)) # Write in the well names letters = list(string.ascii_uppercase) letters = letters[0:16] number = range(1, 25) wells = [] for letter in letters: for num in number: x = letter + str(num) wells.append(x) # Write out to file if one is provided on cammand line if args.outputData == True: w = open(args.outputData, 'w') writer = csv.writer(w ,delimiter="\t") ###################################### Fix the iteration of this writer.writerow(zip(wells, plateMap2)) ###################################### for well, gene in zip(wells, plateMap2): #writer.writerow(well + '\t' + gene) print well + '\t' + gene
def main(): # Read in files from the command line data = aUsefulFunctionsFiltering.readAfile(args.inputData) # Get the filenames from the current directory files = os.listdir('.') # Turn into a dictionary of the barcode and new filename dic = {} for row in data: dic[row[0]] = row[1] # Match filenames for k in dic.keys(): for f in files: if k in f: # Write the new filename using the value of the barcode dictionary newName = dic[k] + '_' + f print "old file name = {0} \t new file name = {1} \n".format(f, newName) # Make a new file by copying the name of the old file + new information from dictionary os.system('cp {0} {1}'.format(f, newName))
def main(): parser = argparse.ArgumentParser( description= """Reads an input file that is either a gene expression matrix or a list of phenotype labels and returns a file suitable for analysis with GSEA""") parser.add_argument('-i', '--inputData', required=True, help='The file containing the data to be converted') parser.add_argument('-o', '--outputData', required=False, help='The file you get at the end') parser.add_argument( '-t', '--typeOfFile', required=True, help= 'Either gem for gene expression matrix or phenotype for phenotype list' ) args = parser.parse_args() data = aUsefulFunctionsFiltering.readAfile(args.inputData) # The switch that calls either of the helper functions if args.typeOfFile == 'gem': result = convertGEMtoGCT(data) # The tabs mess up formatting so print header separaetly header = result[0:2] for line in header: print line for line in result[2:]: print '\t'.join(line) elif args.typeOfFile == 'phenotype': result = convertLabelstoCLS(data) for line in result: print '\t'.join(line) else: print 'You have specified the wrong type of argument to type of file'
def main(): # Read in files from the command line data = aUsefulFunctionsFiltering.readAfile(args.inputData) # Get the filenames from the current directory files = os.listdir('.') # Turn into a dictionary of the barcode and new filename dic = {} for row in data: dic[row[0]] = row[1] # Match filenames for k in dic.keys(): for f in files: if k in f: # Write the new filename using the value of the barcode dictionary newName = dic[k] + '_' + f print "old file name = {0} \t new file name = {1} \n".format( f, newName) # Make a new file by copying the name of the old file + new information from dictionary os.system('cp {0} {1}'.format(f, newName))
def main(): parser = argparse.ArgumentParser( description= """Reads an input file that is a linear representation of a 96 well plate and extracts the replicate level and binds them in columns""") parser.add_argument( '-i', '--inputData', required=True, help='''The file that is the linear data from a 96 well experiment''') parser.add_argument( '-r', '--replication', required=True, help= '''The level of replication of the experiment ie 2 or 3 replicates. CURRENTLY WORKS FOR TRIPLICATE''' ) parser.add_argument( '-f', '--fileType', required=True, help='''Whether this is a fluoro plate reader file (no headers) or a invasion assay data. Options = fluoro or invasion''') parser.add_argument('-o', '--outputData', required=False, help='The file you get at the end') args = parser.parse_args() # Input data. Take out the header if the data is an invasion assay data = aUsefulFunctionsFiltering.readAfile(args.inputData) if args.fileType == 'invasion': header = data[0] data = data[1:] elif args.fileType == 'fluoro': pass else: print 'This file type is not a valid invasion assay or data from a plate reader' # Extract the well names for replicate 1. PUT THIS AS A command line argument in the future and collate the 2 replicates into 1 letters1 = ['B', 'C', 'D'] # Convert to lower case if the data is invasion assay if args.fileType == 'invasion': letters1 = ['b', 'c', 'd'] rep1 = [] x = 1 while x < 13: y = [letter + str(x) for letter in letters1] rep1.append(y) x += 1 # Extract replicate names for replicate 2 letters2 = ['E', 'F', 'G'] if args.fileType == 'invasion': letters2 = ['e', 'f', 'g'] rep2 = [] x = 1 while x < 13: # Using list comprehensions builds the nested list structure that is so useful. Don't use for loops y = [letter + str(x) for letter in letters2] rep2.append(y) x += 1 # Make a header header = ['well1', 'well2', 'well3', 'rep1', 'rep2', 'rep3'] print '\t'.join(header) ######################################## This is the part of the script that does some work ################################################# # If the data is in triplicate if args.replication == '3': for row in data: wellName = row[0] # Strip the file extension from the invasion well names if wellName.endswith('.tif'): wellName = wellName[:-4] # Search the well name of the data against the list of lists containing the replicate structure for group in rep1: if wellName in group: # Append to the replicate structure list group.append(row[1]) # Repeat for the second group of replicates although one day I will combine this step for group in rep2: if wellName in group: group.append(row[1]) ###################################################### ###################################################### # Some error messages elif args.replication == '2': print "Duplicates ain't implemented yet" else: print "Invalid level of replication" # Write out to file if one is provided on command line if args.outputData == True: w = open(args.outputData, 'w') writer = csv.writer(w, delimiter="\t") ###################################### Fix the iteration of this writer.writerow(zip(wells, plateMap2)) ################################## # Print the output to file if args.fileType == 'invasion': rep1 = rep1[0:4] for line in rep1: print '\t'.join(line) if args.fileType == 'fluoro': for line in rep2: print '\t'.join(line)
def main(): parser = argparse.ArgumentParser(description="""Reads an input file that is a linear representation of a 96 well plate and extracts the replicate level and binds them in columns""") parser.add_argument('-i', '--inputData', required=True, help='''The file that is the linear data from a 96 well experiment. This file is the output of transposeLiner96well.py''') parser.add_argument('-r', '--replication', required=True, help='''The level of replication of the experiment ie 2 or 3 replicates. CURRENTLY WORKS FOR TRIPLICATE''') parser.add_argument('-f', '--fileType', required=True, help='''Whether this is a fluoro plate reader file (no headers) or a invasion assay data. Options = fluoro or invasion''') parser.add_argument('-o', '--outputData', required=False, help='The file you get at the end') args = parser.parse_args() # Input data. Take out the header if the data is an invasion assay data = aUsefulFunctionsFiltering.readAfile(args.inputData) if args.fileType == 'invasion': header = data[0] data = data[1:] elif args.fileType == 'fluoro': pass else: print 'This file type is not a valid invasion assay or data from a plate reader' # Obtain in the well names if args.fileType == 'invasion': letters = list(string.ascii_lowercase) elif args.fileType == 'fluoro': letters = list(string.ascii_uppercase) else: print 'You have entered an invalid filetype' letters = letters[0:8] number = range(1, 13) # Paste the well letters and numbers wells = [] for letter in letters: for num in number: x = letter + str(num) wells.append(x) # Make a header header = ['well1', 'well2', 'well3' ,'rep1', 'rep2', 'rep3'] print '\t'.join(header) ######################################## Build the list that will define replicates ################################################# replicateLookup = [ ['a1','a2','a3'], ['a4','a5','a6'],['a7','a8','a9'],['a10','a11','a12'], ['b1','b2','b3'], ['b4','b5','b6'],['b7','b8','b9'],['b10','b11','b12'], ['c1','c2','c3'], ['c4','c5','c6'],['c7','c8','c9'],['c10','c11','c12'], ['d1','d2','d3'], ['d4','d5','d6'],['d7','d8','d9'],['d10','d11','d12'], ['e1','e2','e3'], ['e4','e5','e6'],['e7','e8','e9'],['e10','e11','e12'], ['f1','f2','f3'], ['f4','f5','f6'],['f7','f8','f9'],['f10','f11','f12'], ['g1','g2','g3'], ['g4','g5','g6'],['g7','g8','g9'],['g10','g11','g12'], ['h1','h2','h3'], ['h4','h5','h6'],['h7','h8','h9'],['h10','h11','h12'], ] if args.fileType == 'fluoro': replicateLookupN = [] for group in replicateLookup: group = [string.upper(well[0]) + well[1:] for well in group] replicateLookupN.append(group) ######################################## This is the part of the script that does some work ################################################# # If the data is in triplicate if args.replication == '3': for row in data: wellName = row[0] # Strip the file extension from the invasion well names if wellName.endswith('.tif'): wellName = wellName[:-4] # Search the well name of the data against the list of lists containing the replicate structure if args.fileType == 'fluoro': replicateLookup = replicateLookupN for group in replicateLookup: if wellName in group: # Append to the replicate structure list group.append(row[1]) ############################################################################################################ # Some error messages elif args.replication == '2': print "Duplicates ain't implemented yet" else: print "Invalid level of replication" # Write out to file if one is provided on command line if args.outputData == True: w = open(args.outputData, 'w') writer = csv.writer(w ,delimiter="\t") ###################################### Fix the iteration of this writer.writerow(zip(wells, plateMap2)) ################################## for line in replicateLookup: print '\t'.join(line)
def main(): parser = argparse.ArgumentParser( description= """Reads an input file that is a linear representation of a 96 well plate and extracts the replicate level and binds them in columns""") parser.add_argument( '-i', '--inputData', required=True, help='''The file that is the linear data from a 96 well experiment. This file is the output of transposeLiner96well.py''') parser.add_argument( '-r', '--replication', required=True, help= '''The level of replication of the experiment ie 2 or 3 replicates. CURRENTLY WORKS FOR TRIPLICATE''' ) parser.add_argument( '-f', '--fileType', required=True, help='''Whether this is a fluoro plate reader file (no headers) or a invasion assay data. Options = fluoro or invasion''') parser.add_argument('-o', '--outputData', required=False, help='The file you get at the end') args = parser.parse_args() # Input data. Take out the header if the data is an invasion assay data = aUsefulFunctionsFiltering.readAfile(args.inputData) if args.fileType == 'invasion': header = data[0] data = data[1:] elif args.fileType == 'fluoro': pass else: print 'This file type is not a valid invasion assay or data from a plate reader' # Obtain in the well names if args.fileType == 'invasion': letters = list(string.ascii_lowercase) elif args.fileType == 'fluoro': letters = list(string.ascii_uppercase) else: print 'You have entered an invalid filetype' letters = letters[0:8] number = range(1, 13) # Paste the well letters and numbers wells = [] for letter in letters: for num in number: x = letter + str(num) wells.append(x) # Make a header header = ['well1', 'well2', 'well3', 'rep1', 'rep2', 'rep3'] print '\t'.join(header) ######################################## Build the list that will define replicates ################################################# replicateLookup = [ ['a1', 'a2', 'a3'], ['a4', 'a5', 'a6'], ['a7', 'a8', 'a9'], ['a10', 'a11', 'a12'], ['b1', 'b2', 'b3'], ['b4', 'b5', 'b6'], ['b7', 'b8', 'b9'], ['b10', 'b11', 'b12'], ['c1', 'c2', 'c3'], ['c4', 'c5', 'c6'], ['c7', 'c8', 'c9'], ['c10', 'c11', 'c12'], ['d1', 'd2', 'd3'], ['d4', 'd5', 'd6'], ['d7', 'd8', 'd9'], ['d10', 'd11', 'd12'], ['e1', 'e2', 'e3'], ['e4', 'e5', 'e6'], ['e7', 'e8', 'e9'], ['e10', 'e11', 'e12'], ['f1', 'f2', 'f3'], ['f4', 'f5', 'f6'], ['f7', 'f8', 'f9'], ['f10', 'f11', 'f12'], ['g1', 'g2', 'g3'], ['g4', 'g5', 'g6'], ['g7', 'g8', 'g9'], ['g10', 'g11', 'g12'], ['h1', 'h2', 'h3'], ['h4', 'h5', 'h6'], ['h7', 'h8', 'h9'], ['h10', 'h11', 'h12'], ] if args.fileType == 'fluoro': replicateLookupN = [] for group in replicateLookup: group = [string.upper(well[0]) + well[1:] for well in group] replicateLookupN.append(group) ######################################## This is the part of the script that does some work ################################################# # If the data is in triplicate if args.replication == '3': for row in data: wellName = row[0] # Strip the file extension from the invasion well names if wellName.endswith('.tif'): wellName = wellName[:-4] # Search the well name of the data against the list of lists containing the replicate structure if args.fileType == 'fluoro': replicateLookup = replicateLookupN for group in replicateLookup: if wellName in group: # Append to the replicate structure list group.append(row[1]) ############################################################################################################ # Some error messages elif args.replication == '2': print "Duplicates ain't implemented yet" else: print "Invalid level of replication" # Write out to file if one is provided on command line if args.outputData == True: w = open(args.outputData, 'w') writer = csv.writer(w, delimiter="\t") ###################################### Fix the iteration of this writer.writerow(zip(wells, plateMap2)) ################################## for line in replicateLookup: print '\t'.join(line)
def main(): parser = argparse.ArgumentParser(description="""Reads an input file that is a linear representation of a 96 well plate and extracts the replicate level and binds them in columns""") parser.add_argument('-i', '--inputData', required=True, help='''The file that is the linear data from a 96 well experiment''') parser.add_argument('-r', '--replication', required=True, help='''The level of replication of the experiment ie 2 or 3 replicates. CURRENTLY WORKS FOR TRIPLICATE''') parser.add_argument('-f', '--fileType', required=True, help='''Whether this is a fluoro plate reader file (no headers) or a invasion assay data. Options = fluoro or invasion''') parser.add_argument('-o', '--outputData', required=False, help='The file you get at the end') args = parser.parse_args() # Input data. Take out the header if the data is an invasion assay data = aUsefulFunctionsFiltering.readAfile(args.inputData) if args.fileType == 'invasion': header = data[0] data = data[1:] elif args.fileType == 'fluoro': pass else: print 'This file type is not a valid invasion assay or data from a plate reader' # Extract the well names for replicate 1. PUT THIS AS A command line argument in the future and collate the 2 replicates into 1 letters1 = ['B', 'C', 'D'] # Convert to lower case if the data is invasion assay if args.fileType == 'invasion': letters1 = ['b', 'c', 'd'] rep1 = [] x = 1 while x < 13: y = [letter + str(x) for letter in letters1] rep1.append(y) x += 1 # Extract replicate names for replicate 2 letters2 = ['E', 'F', 'G'] if args.fileType == 'invasion': letters2 = ['e', 'f', 'g'] rep2 = [] x = 1 while x < 13: # Using list comprehensions builds the nested list structure that is so useful. Don't use for loops y = [letter + str(x) for letter in letters2] rep2.append(y) x += 1 # Make a header header = ['well1', 'well2', 'well3' ,'rep1', 'rep2', 'rep3'] print '\t'.join(header) ######################################## This is the part of the script that does some work ################################################# # If the data is in triplicate if args.replication == '3': for row in data: wellName = row[0] # Strip the file extension from the invasion well names if wellName.endswith('.tif'): wellName = wellName[:-4] # Search the well name of the data against the list of lists containing the replicate structure for group in rep1: if wellName in group: # Append to the replicate structure list group.append(row[1]) # Repeat for the second group of replicates although one day I will combine this step for group in rep2: if wellName in group: group.append(row[1]) ###################################################### ###################################################### # Some error messages elif args.replication == '2': print "Duplicates ain't implemented yet" else: print "Invalid level of replication" # Write out to file if one is provided on command line if args.outputData == True: w = open(args.outputData, 'w') writer = csv.writer(w ,delimiter="\t") ###################################### Fix the iteration of this writer.writerow(zip(wells, plateMap2)) ################################## # Print the output to file if args.fileType == 'invasion': rep1 = rep1[0:4] for line in rep1: print '\t'.join(line) if args.fileType == 'fluoro': for line in rep2: print '\t'.join(line)