Python DataUtilities Examples, DataUtilities Python Examples

Example #1

0

Show file

File: Main.py Project: PolymerGuy/Calibrator

    def CALIBRATE(self):


        try:
            self.materialModel=DataUtilities.materialModels[str(self.ui.comboBox_4.currentText())]
            self.popt, self.pcov = DataUtilities.fitData(self.time, self.strain, self.stress, self.materialModel)

            self.statusString += 'Optimizer routine completed \n'
            #self.statusString += str(popt, pcov)
        except:
            self.statusString += 'Optimizer routine failed \n'


        try:
            self.stressEvaluated = DataUtilities.evaluateMaterialModel(self.strain,self.popt,self.materialModel)
        finally:
            self.UpdateStatus()
            self.PLOT(self.strain,self.stressEvaluated)

Example #2

0

Show file

File: DataSummary.py Project: MaciCrowell/TCGA_DataScience

def getAllSummary():
	patientList = DataUtilities.getListofPlatPatients()
	getSummaryGender(patientList)
	getSummaryPrior_dx(patientList)
	getSummaryRace(patientList)
	getSummaryEthnicity(patientList)
	getSummaryPathologic_stage(patientList)
	getSummaryNumber_pack_years_smoked(patientList)
	getSummaryTobacco_smoking_history(patientList)
	getSummaryPrimary_therapy_outcome_success(patientList)
	getSummaryVital_status(patientList)
	getSummaryYears_to_Birth(patientList)

Example #3

0

Show file

File: Main.py Project: PolymerGuy/Calibrator

    def importFile(self):
        self.fileDialog = QtGui.QFileDialog(self)
        self.dataPath = unicode(self.fileDialog.getOpenFileName())

        try:
            self.time, self.strain, self.stress = DataUtilities.readData(self.dataPath)
            self.statusString += 'Read data routine completed \n'
        except:
            self.statusString += 'Read data routine failed \n'

        finally:
            self.PLOT(self.strain,self.stress)
            #print (self.dataPath)
            self.UpdateStatus()

Example #4

0

Show file

File: TCGAlogReg.py Project: MaciCrowell/TCGA_DataScience

def run_regression(survey, version):
	"""Runs logistic regressions.

	survey: Survey
	version: which model to run

	Returns: Regressions object
	"""
	dep, control = DataUtilities.get_version(version)

	print dep, control
	reg = survey.make_logistic_regression(dep, control)
	#print reg
	return Regressions([reg])

Example #5

0

Show file

File: TCGAlogReg.py Project: MaciCrowell/TCGA_DataScience

def test_models(version=(30,-1), resample_flag=False, patients = -1, printReg = True):
	means = dict(educ_from_12=4,
			born_from_1960=10)

	if patients == -1:
		patients = DataUtilities.getDictReadofPatientsFilled()
	#patients = getDictReadofPatientsFilled()
	# read the survey
	survey, complete = read_complete(version, patients)

	print DataUtilities.get_version(version)

	#compare_survey_and_complete(survey, complete)

	#print 'all respondents', survey.len()

	#print 'complete', complete.len()

	# run the models
	if printReg:
		regs = run_regression_and_print(survey, version=version, means=means)
	else:
		regs = run_regression(survey, version)
	return regs

Example #6

0

Show file

File: TCGAlogReg.py Project: MaciCrowell/TCGA_DataScience

def read_complete(version,patients):
	survey = read_survey(patients)

	# give respondents random values
	#[r.clean_random() for r in survey.respondents()]

	# select complete records
	dep, control = DataUtilities.get_version(version)
	#for var in [dep] + control:
	#	print r'\verb"%s",' % var

	attrs = [dep] + control
	complete = survey.subsample(lambda r: r.is_complete(attrs))

	return survey, complete

Example #7

0

Show file

File: DataSummary.py Project: MaciCrowell/TCGA_DataScience

def getSummaryNumber_pack_years_smoked(patientList=None):
	if patientList is None:
		patientList = DataUtilities.getListofPatients()
	dataDict = {}
	for patient in patientList:
		data = patient.getNumber_pack_years_smoked()
		if data == None:
			data = "Unknown"
		if data in dataDict.keys():
			dataDict[data] += 1
		else:
			dataDict[data] = 1
	print "Number_pack_years_smoked Breakdown"
	for key, value in dataDict.iteritems():
		print key + ": " + str(value)
	print ""
	return dataDict

Example #8

0

Show file

File: DataSummary.py Project: MaciCrowell/TCGA_DataScience

def getSummaryPathologic_stage(patientList=None):
	if patientList is None:
		patientList = DataUtilities.getListofPatients()
	dataDict = {}
	for patient in patientList:
		data = patient.getPathologic_stage()
		if data == None:
			data = "Unknown"
		if data in dataDict.keys():
			dataDict[data] += 1
		else:
			dataDict[data] = 1
	print "Pathologic_stage Breakdown"
	for key, value in dataDict.iteritems():
		print key + ": " + str(value)
	print ""
	return dataDict

Example #9

0

Show file

File: DataSummary.py Project: MaciCrowell/TCGA_DataScience

def getSummaryYears_to_Birth(patientList=None):
	if patientList is None:
		patientList = DataUtilities.getListofPatients()
	dataDict = {}
	for patient in patientList:
		data = patient.getYears_to_birthClean()
		if data == None:
			data = "Unknown"
		if data in dataDict.keys():
			dataDict[data] += 1
		else:
			dataDict[data] = 1
	print "Years_to_Birth Breakdown"
	for key, value in dataDict.iteritems():
		print str(key) + ": " + str(value)
	print ""
	return dataDict

Example #10

0

Show file

File: DataSummary.py Project: MaciCrowell/TCGA_DataScience

def getSummaryPrimary_therapy_outcome_success(patientList=None):
	if patientList is None:
		patientList = DataUtilities.getListofPatients()
	dataDict = {}
	for patient in patientList:
		data = patient.getPrimary_therapy_outcome_success()
		if data == None:
			data = "Unknown"
		if data in dataDict.keys():
			dataDict[data] += 1
		else:
			dataDict[data] = 1
	print "Primary_therapy_outcome_success Breakdown"
	for key, value in dataDict.iteritems():
		print key + ": " + str(value)
	print ""
	return dataDict

Example #11

0

Show file

File: DataSummary.py Project: MaciCrowell/TCGA_DataScience

def getSummaryTobacco_smoking_history(patientList=None):
	if patientList is None:
		patientList = DataUtilities.getListofPatients()
	dataDict = {}
	for patient in patientList:
		data = patient.getTobacco_smoking_history()
		if data == None:
			data = "Unknown"
		if data in dataDict.keys():
			dataDict[data] += 1
		else:
			dataDict[data] = 1
	print "getTobacco_smoking_history Breakdown"
	for key, value in dataDict.iteritems():
		print key + ": " + str(value)
	print ""
	return dataDict

Example #12

0

Show file

def IBOrders(d,q_data,q_msg_o,q_err_o):
	
	# Setup second logger-------------------------------------------------------
	log2=logging.getLogger('IBOrdersScript')
	log2.setLevel(logging.INFO)
	
	formatter=logging.Formatter('%(asctime)s %(name)s %(levelname)s %(message)s')
	
	file_handler=logging.FileHandler('IBOrdersScript.log')
	file_handler.setFormatter(formatter)
	
	stream_handler=logging.StreamHandler()
	stream_handler.setFormatter(formatter)
	
	log2.addHandler(file_handler)
	log2.addHandler(stream_handler)
	#---------------------------------------------------------------------------
	
	ib=IB()
	
	try:
		
		# Start main process loop
		while True:
			
			# Check the message queue and either connect or quit
			if not q_msg_o.empty():
				
				message=q_msg_o.get() # Get the message
				
				if message=='Start':  # Start IB
				
					log2.info('Starting IB ordering process')
					_=util.logToFile('IBOrders.log',level=20,ibapiLevel=20) # Logger
					_=ib.connect('127.0.0.1', 7497, 2) # Connect to IB
					ib.sleep(1)
					log2.info('Connected to IB ordering process')
					
				elif message=='Stop':  # Stop IB
				
					if ib.isConnected():
						ib.disconnect()
						ib.sleep(1)
						log2.info('Controlled disconnection from IB ordering process')
					
			# While we are connected, do stuff
			if ib.isConnected():
				
				# If there is data in the queue, go through it and make trades
				if not q_data.empty():
		
					data=q_data.get()      # Get data
					ticker=data[0].symbol  # tikcer
					
					# Call the model, it should know your input JSON structure
					trd=model.Position(data,d[ticker])
					
					# Get model parameters, UNITS and stop percentage
					params=model.Parameters(d[ticker])
					
					# If we are going to open a position, check time of day here
					if trd==1:
						
						# Open position
						log2.info('Placing market order for '+ticker+' with ticktime '+str(data[1])+' and ask '+str(data[3]))
						order=MarketOrder('BUY',params[0])
						#contract=ib.qualifyContracts(Stock(,'Smart','USD'))
						trade=ib.placeOrder(data[0],order)
						
						# Update model dynamic parameters
						d[ticker][-1][0]=1                      # Position flag
						d[ticker][-1][1]=data[4]                # Open price 
						d[ticker][-1][2]=data[4]*(1-params[1])  # Stop price
						
						# Write the updated model dict to JSON for safety
						_=DataUtilities.ModelOut(d)
					
					# If we are going to sell an open position
					elif trd==-1:
						
						# Close position
						log2.info('Closing position for '+ticker+' with ticktime '+str(data[1])+' and bid '+str(data[2]))
						order=MarketOrder('SELL',params[0])
						#contract=ib.qualifyContracts(Stock(str(data[0]),'Smart','USD'))
						trade=ib.placeOrder(data[0],order)
						
						# Update model dynamic parameters
						d[ticker][-1]=[0,0,0,0]
						
						# Write the updated model dict to JSON for safety
						_=DataUtilities.ModelOut(d)
						
					# Check if we need to adjust stop 
					elif trd==0:
						
						# Set trailing stop flg is needed
						if d[ticker][-1][0] and not d[ticker][-1][3]:
							d[ticker][-1][3]=model.SetTrail(data[4],d[ticker])
							
							# Write the updated model dict to JSON for safety
							_=DataUtilities.ModelOut(d)
							
						# Adjust stop price (trailing stop) if needed
						if d[ticker][-1][0] and d[ticker][-1][3]:
							d[ticker][-1][2]=model.NewStop(data[4],d[ticker])
							
							# Write the updated model dict to JSON for safety
							_=DataUtilities.ModelOut(d)
							
						
						
	
	except KeyboardInterrupt:
		ib.disconnect()
		ib.sleep(1)
		log2.info('Controlled disconnection from IB ordering process')
	
	except Exception as e:
		_=q_err_o.put(d)
		ib.disconnect()
		ib.sleep(1)
		log2.error('Disconnected by exception IB ordering process')
		log2.exception(e)

Example #13

0

Show file

	formatter=logging.Formatter('%(asctime)s %(name)s %(levelname)s %(message)s')
	
	file_handler=logging.FileHandler('IBMain.log')
	file_handler.setFormatter(formatter)
	
	stream_handler=logging.StreamHandler()
	stream_handler.setFormatter(formatter)
	
	logger.addHandler(file_handler)
	logger.addHandler(stream_handler)
	#---------------------------------------------------------------------------
	
	# Set up the ticker list and model------------------------------------------
	
	# Read in the JSON and get existing positions (if you want)
	tickerdata=DataUtilities.GetTickerDataCL()
	if tickerdata==0: sys.exit()
	tlist=list(tickerdata.keys()) 
	positions=DataUtilities.GetPositionsCL()
	if positions==0: positions={}
	
	# Build the "model", it is just a dictionary where tickers are keys and 
	# values are lists of trading related parameters
	d=DataUtilities.BuildModel(positions,tickerdata)  # Model dictionary
	#---------------------------------------------------------------------------
	
	# Set up multiprocessing----------------------------------------------------
	
	# Set up the queues for data and message traffic.	
	q_data=mp.Queue()
	q_msg_f=mp.Queue()

Example #14

0

Show file

File: playingWithData.py Project: MaciCrowell/TCGA_DataScience

import glob
import xml.etree.ElementTree as ET
import thinkplot
import DataUtilities
import numpy

matching = DataUtilities.findPatientFiles()
namespaces = DataUtilities.getPatientXMLNameSpaces()

drugList = {}
patientsWithDrugs = 0
drugsPerPatient = []

patientWithPlat = 0

"getDataOverview"
for patient in matching:
	tree = ET.parse(patient)
	rootElement = tree.getroot()
	plat = False
	for drugs in rootElement.findall("luad:patient/rx:drugs", namespaces=namespaces):
		if len(drugs) != 0:
			drugsPerPatient.append(len(drugs))
			patientsWithDrugs += 1
			for drug in drugs:
				drugName = drug.find('rx:drug_name' , namespaces=namespaces).text
				if drugName:
					drugName = DataUtilities.getTrueDrugName(drugName.lower())
					# I think this is where FixingErrors.py should go.
					if drugName.lower() == "cisplatin" or drugName.lower() == "carboplatin":
						plat = True

Example #15

0

Show file

File: cumLogReg.py Project: MaciCrowell/TCGA_DataScience

def errorTotal():
	means = {}
	version  = (30,-1)
	cumDict = {}

	patientDict = DataUtilities.getDictReadofPatients()

	for i in range(100):
		patients = DataUtilities.getDictReadofPatientsFilled(patientDict = patientDict)
		#resample
		sPatients = sample_wr(patients, 91)

		regs = TCGAlogReg.test_models(version = version, patients = sPatients, printReg = False)
		for reg in regs.regs:
			cumulative_odds = reg.report_odds(means, printCum = False)
			for name, odds, p in cumulative_odds:
				if name in cumDict.keys():
					cumDict[name].append(odds)
				else:
					cumDict[name] = [odds]


	print "ErrorTotal"
	#create table
	print r"\begin{table}[h]"
	print r"\begin{tabular}{|l|l|l|l|}"
	print r"\hline"
	print r"\textbf{Type} & \textbf{Odds} & \textbf{Odds Lower} & \textbf{Odds Upper}\\ \hline"

	for key, oddsList in sorted(cumDict.items(), key=lambda e: sum(e[1])):
		if key not in mutationDict.getMutationsGreaterThan(1):
			average = sum(oddsList) / float(len(oddsList))
			lower = getLowerConfidence(oddsList)
			upper= getUpperLowerConfidence(oddsList)
			print key  + " & "  "{:.3f}".format(average) + " & " + "{:.3f}".format(lower)  + " & "  "{:.3f}".format(upper) + r"\\ \hline"

	print "\end{tabular}"
	print "\end{table}"

	print ""
	print r"\begin{table}[h]"
	print r"\begin{tabular}{|l|l|l|l|}"
	print r"\hline"
	print r"\textbf{Type} & \textbf{Odds} & \textbf{Odds Lower} & \textbf{Odds Upper}\\ \hline"

	names = []
	averages = []
	lowers = []
	uppers = []
	for key, oddsList in sorted(cumDict.items(), key=lambda e: sum(e[1])):
		if key in mutationDict.getMutationsGreaterThan(1):
			average = sum(oddsList) / float(len(oddsList))
			lower = getLowerConfidence(oddsList)
			upper= getUpperConfidence(oddsList)
			print key  + " & "  "{:.3f}".format(average) + " & (" + "{:.3f}".format(lower)  + ", "  "{:.3f}".format(upper) + r")\\ \hline"
			names.append(key)
			averages.append(average)
			lowers.append(lower)
			uppers.append(upper)
	
	print "\end{tabular}"
	print "\caption{The above table shows the effect each mutation has when tested in conjunction with the other mutations}"
	print "\end{table}"
	plotRegMult(names, averages, lowers, uppers, "Mutation Log Odds With Total Error","Mutations", "Log Odds")

Example #16

0

Show file

File: cumLogReg.py Project: MaciCrowell/TCGA_DataScience

def errorMissing():
	means = {}
	version  = (30,-1)
	cumDict = {}

	patientDict = DataUtilities.getDictReadofPatients()

	for i in range(100):
		patients = DataUtilities.getDictReadofPatientsFilled(patientDict = patientDict)
		regs = TCGAlogReg.test_models(version = version, patients = patients, printReg = False)
		for reg in regs.regs:
			cumulative_odds = reg.report_odds(means, printCum = False)
			for name, odds, p in cumulative_odds:
				if name in cumDict.keys():
					cumDict[name].append(odds)
				else:
					cumDict[name] = [odds]


	print "ErrorMissing"
	#create table
	print r"\begin{table}[h]"
	print r"\begin{tabular}{|l|l|l|}"
	print r"\hline"
	print r"\textbf{Type} & \textbf{Odds} & \textbf{Confidence Interval}\\ \hline"

	namesL = []
	averagesL = []
	lowersL = []
	uppersL = []
	for key, oddsList in sorted(cumDict.items(), key=lambda e: sum(e[1])):
		if key not in mutationDict.getMutationsGreaterThan(1):
			average = sum(oddsList) / float(len(oddsList))
			lower = getLowerConfidence(oddsList)
			upper= getUpperConfidence(oddsList)
			print key  + " & "  "{:.3f}".format(average) + " & (" + "{:.3f}".format(lower)  + ", "  "{:.3f}".format(upper) + r")\\ \hline"
			namesL.append(key)
			averagesL.append(average)
			lowersL.append(lower)
			uppersL.append(upper)
	print "\end{tabular}"
	print "\end{table}"

	plotRegMult(namesL, averagesL, lowersL, uppersL, "Life Factor Log Odds With Sampling Error","Factor", "Log Odds")

	print ""
	print r"\begin{table}[h]"
	print r"\begin{tabular}{|l|l|l|}"
	print r"\hline"
	print r"\textbf{Type} & \textbf{Odds} & \textbf{Confidence Interval}\\ \hline"

	namesM = []
	averagesM = []
	lowersM = []
	uppersM = []
	for key, oddsList in sorted(cumDict.items(), key=lambda e: sum(e[1])):
		if key in mutationDict.getMutationsGreaterThan(1):
			average = sum(oddsList) / float(len(oddsList))
			lower = getLowerConfidence(oddsList)
			upper= getUpperConfidence(oddsList)
			print key  + " & "  "{:.3f}".format(average) + " & (" + "{:.3f}".format(lower)  + ", "  "{:.3f}".format(upper) + r")\\ \hline"
			namesM.append(key)
			averagesM.append(average)
			lowersM.append(lower)
			uppersM.append(upper)
	print "\end{tabular}"
	print "\caption{The above table shows the effect each mutation has when tested independently the other mutations. The confidence intervals are shown for the error due to missing data.}"
	print "\end{table}"
	plotRegMult(namesM, averagesM, lowersM, uppersM, "Mutation Log Odds With Sampling Error","Mutation", "Log Odds")
	plotRegMult(namesL + namesM, averagesL + averagesM, lowersL + lowersM, uppersL + uppersM, "Log Odds With Sampling Error","Factor", "Log Odds")