def create_relationship( old_dict_list): # recebe a lista de dicionarios de t_atividade """ Esta função tem o objetivo de montar um relacionamento entre 't_atividade' e 'custeio'. Neste relacionamento deve haver o ID de ambas as partes assim como o valor referente ao relacionamento :param old_dict_list: :return: """ custeio_dict_list = readcsv( "csv_test/custeio.csv") # lista de dicionario do custeio relationship_dict_list = [ ] # lista de dicionarios que formará o .csv do relacionamento for atividade_dict in old_dict_list: # Itera sobre cada dicionario da lista de atividades #DEBUG #print(atividade_dict['nome']) for custeio_dict in custeio_dict_list: # Itera sobre cada dicionario da lista de custeios """ Plano de implementação: 1. Estabelecer relacionamento entre todos os elementos de t_atividade e custeio ( X ) 2. Eliminar os relacionamentos que não possuam valor ( X ) 3. Tranformar os dados restantes em uma lista de dicionário para serem inseridos no .csv ( ) 3.1. Nomear as celulas da primeira linha do .csv com os cabeçalhos ( ) """ relationship_dict = {} # custeio_dict['id_custeio'] + "-" + custeio_dict['nome'] = # 'id_custeio-nome_custeio' que forma uma chave para o dicionario # Assim podemos recuperar o valor do custeio relcaionado a atividade # Se não houver um valor relacionado a uma relação entre t_atividade e custeio esta relação não deve aparecer na tabela # lembrando que a chave referente a cada custeio na planilha t_atividade é formada pelo id_custeio + nome_custeio if atividade_dict[custeio_dict['id_custeio'] + "-" + custeio_dict['nome']] != "": relationship_dict['id_atividade'] = atividade_dict[ 'id_atividade'] relationship_dict['id_custeio'] = custeio_dict['id_custeio'] relationship_dict['valor'] = atividade_dict[ custeio_dict['id_custeio'] + "-" + custeio_dict['nome']] relationship_dict_list.append(relationship_dict) #print(relationship_dict) #DEBUG #print("ID Atividade: " + atividade_dict['id_atividade'] + " -> " + # "ID Custeio: " + custeio_dict['id_custeio'] + " " + # custeio_dict['nome'] + ": " + # atividade_dict[ custeio_dict['id_custeio'] + "-" + custeio_dict['nome']]) #print("---------------------------------------------------------------------") #for dicio in old_dict_list: # print("A atividade ID " + dicio['id_atividade'] + # " '" + dicio['nome'] + "' esta relacionada ao custeio DE VALOR:" + dicio['11-passagens']) return relationship_dict_list
def test_readcsv(): from readcsv import readcsv raw_data = readcsv('test_readcsv.csv') assert raw_data == {'time': [1, 7, 1.2], 'voltage': [2, 9, 4.5]} assert raw_data != { 'time': [1, '', 7, 'c', 1.2], 'voltage': [2, 6, 9, 10, 4.5] }
def allcountries(): """ Cette fonction permet de réduire la liste des pays en supprimant les doublons avec set() et de les classer par ordre croissant avec sorted(). On retrouve chaque pays du fichier dans une liste. This function allows you to reduce the list of countries by deleting the duplicates with set() and to sort them in ascending order with sorted(). Each country in the file is found in a list. """ logging.debug("Utilisation de la fonction allcountries()") df = readcsv('Region') countries = list(sorted(set(df['Region'].to_list()))) logging.debug(countries) return countries
def allyears(): """ Cette fonction permet de réduire la liste des années en supprimant les doublons avec set() et de les classer par ordre croissant avec sorted(). On retrouve chaque année du fichier dans une liste. This function allows you to reduce the list of years by deleting the duplicates with set() and to sort them in ascending order with sorted(). Each year of the file can be found in a list. """ logging.debug("Utilisation de la fonction allyears()") df = readcsv('Year', 'Region') years = list(sorted(set(df['Year'].to_list()))) logging.debug(years) return years
def showphoto(path, starttime, phototimefile=r'D:\mitacs\image\photo\phototime.csv'): photodata = readcsv(phototimefile) phototime = [] for i in range(0, len(photodata)): phototime.append( datetime.datetime.strptime(photodata[i][1], '%Y-%m-%d %H:%M:%S')) print(phototime[i]) if phototime[i] > starttime: if (phototime[i] - starttime) > (starttime - phototime[i - 1]): img = Image.open(photodata[i - 1][0]) print(photodata[i - 1][0]) else: img = Image.open(photodata[i][0]) print(photodata[i][0]) img.show()
def readctg(ctgcsv, excludeitself = False): ## include itself r = readcsv.readcsv(ctgcsv, 'int') ctg = r.getData() id = np.unique(ctg[:,0]) ctgdic = {} for item in id: ctgdic[item] = [] if excludeitself: for item in ctg: if item[0] <> item[1]: ctgdic[item[0]].append(item[1]) else: for item in ctg: ctgdic[item[0]].append(item[1]) return ctgdic
def byyear(year): """ Cette fonction permet, en fonction de l'année choisi, de récupérer la moyenne des émission totales de CO2 (en Milliers de tonnes) au niveau mondial. This function allows, depending on the chosen year, to retrieve average total CO2 emissions (in Thousands of tons) worldwide. """ logging.debug(f"Utilisation de la fonction byyear({year})") globalemission = ["Emissions (thousand metric tons of carbon dioxide)"] df = readcsv('Year', 'Value', 'Emission', 'Region') df = df.loc[df['Year'].isin([str(year)])] df = df.loc[df['Emission'].isin(globalemission)] res = {} res["Year"] = year res["Total"] = round(df['Value'].mean(), 3) logging.debug(res) return res
def bycountry(country): """ Cette fonction permet, en fonction du pays choisi, de récupérer l'entrée la plus récente concernant l'émission totale de CO2 en Milliers de tonnes. This function allows, depending on the chosen country, to retrieve the most recent entry for total CO2 emissions in Thousands of tons. """ logging.debug(f"Utilisation de la fonction bycountry({country.title()})") df = readcsv('Region', 'Year', 'Value') df = df.loc[df['Region'].isin([country])] df = df.sort_values(by='Year', ascending=False) res = {} res["Country"] = str(df.iloc[0][0]).title() res["Year"] = int(df.iloc[0][1]) res["Emissions"] = float(df.iloc[0][2]) logging.debug(res) return res
def bypercapita(country): """ Cette fonction permet, en fonction du pays choisi, d'afficher les émissions de CO2 (en tonnes par habitant) par rapport aux différentes années de relevés. This function allows, depending on the chosen country, display CO2 emissions (in tons per capita) in relation to the different survey years. """ logging.debug(f"Utilisation de la fonction bypercapita({country.title()})") capita = ["Emissions per capita (metric tons of carbon dioxide)"] df = readcsv('Region', 'Year', 'Emission', 'Value') df = df.loc[df['Region'].isin([country])] df = df.loc[df['Emission'].isin(capita)] res = {} nbannee = len(allyears()) i = 0 while i < nbannee: res[int(df.iloc[i][1])] = float(df.iloc[i][3]) i += 1 logging.debug(res) return res
def test_readcsv(capsys): dirpath = Path("C:/code/cohort4/python-IO") filename = "Census_by_Community_2019.csv" hdr = "************************************************************\n" hdr += "************** Calgary Public Data Summary ***************\n" hdr += "************************************************************\n" content1 = "Residential and SOUTH: 230129" content2 = "Industrial and NORTH: 0" content3 = "Overall Total: 1283177" content4 = "Total number of records: 307" ftr = "************************************************************\n" ftr += "*************** End of Report Summary ********************\n" ftr += "************************************************************\n" result = readcsv(dirpath, filename) captured = capsys.readouterr() reportdata = readfile(dirpath, "report.txt") assert result == { "keyvaluepair": { "RES_CNT": 10, "CLASS": 1, "SECTOR": 5 }, "linenum": 308 } assert hdr in captured.out assert ftr in captured.out assert content1 in captured.out assert content2 in captured.out assert content3 in captured.out assert content4 in captured.out assert os.path.isfile(os.path.join(dirpath, "report.txt")) == True assert content1 in reportdata assert content2 in reportdata assert content3 in reportdata assert content4 in reportdata
def drowFathers(slicelabel): label_data = readcsv(r'D:\mitacs\audio classification\label.csv') branch = [] for z in range(1, len(slicelabel), 2): drowFather2child(label_data, slicelabel[z], branch) print(branch) x = 0 while x < len(branch): y = x + 2 while y < len(branch): if branch[x] == branch[y] and branch[x + 1] == branch[y + 1]: branch.pop(y + 1) branch.pop(y) y = y + 2 x = x + 2 print(branch) branch2 = list(map(lambda x: x.replace(' ', '\n'), branch)) print(branch2) slicelabel = list(map(lambda x: x.replace(' ', '\n'), slicelabel)) g = Digraph('测试图片', format="png") for x in range(1, len(slicelabel), 2): print(slicelabel[x]) color = round(8 * float(slicelabel[x + 1]), 3) print(color) g.node(slicelabel[x], color='0.000 %f %f' % (color, color), shape='circle', width='2', height='2', penwidth='%f' % color) for i in range(0, len(branch) - 1, 2): g.node(branch2[i], shape='circle', width='2', height='2') g.node(branch2[i + 1], shape='circle', width='2', height='2') g.edge(branch2[i + 1], branch2[i]) # i = i+2 #print(g.source) g.view()
The calculated values are compiled into a dictionary called metrics and written into a JSON file under the same name as the input in the folder output_data. Once the file is properly written to the json file, a new message is entered into the log. """ logging.basicConfig(filename="Main_Log.txt", format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p') Successful_Read = False while Successful_Read is False: filename = input("Enter filename: ") try: raw_data = readcsv('test_data/' + filename + '.csv') except FileNotFoundError: print("File not found, enter new filename") continue try: validate_data(raw_data) except DiffListLengthError: print("File contains lists of different lengths, " "enter new filename") continue except NegTimeValueError: print("File contains negative time values, " "enter new filename") continue logging.warning("Info: " + filename + ".csv was read and validated") Successful_Read = True
print data datadict = {} for item in data: datadict[item[0]] = int(item[1]) print datadict for key in sorted(datadict): #print "%s: %s" % (key, datadict[key]) print key, '\t', datadict[key] """ tractcsv = "C:/_DATA/CancerData/NewEnglandDiseaseData/tract/tractinfo_intonly.csv" countycsv = "C:/_DATA/CancerData/NewEnglandDiseaseData/tract/countyinfo_intonly.csv" r = readcsv.readcsv(tractcsv) tractdata = r.getData() r = readcsv.readcsv(countycsv) countydata = r.getData() countydict = {} scale = [] for i in xrange(len(countydata)): countydict[int(countydata[i, 4])] = i scale.append(1.0 * countydata[i, 5] / countydata[i, 6]) print countydict # tractpop = np.zeros(len(countydata)) for i in xrange(len(tractdata)): # print tractdata
import sys import os sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common')) import numpy as np import readcsv from scipy import stats #-------------------------------------------------------------------------- #MAIN if __name__ == "__main__": filePath = 'C:/_DATA/CancerData/SatScan/mult6000/three16_modify/' sigmacsv = filePath + 'sigma_summary.csv' r = readcsv.readcsv(sigmacsv) data = r.getData() diff = data[:,0] - data[:,1] print 't-statistic = %6.3f, pvalue = %6.4f' % stats.ttest_1samp(diff,0.0)
import csv from readcsv import readcsv CSV_FILE_PATH = "csv_test/atividade.csv" ATIVIDADE_DICT_LIST = readcsv( CSV_FILE_PATH ) # Retorna a lista de dicionário gerado a partir de atividade.csv def show_dicts(dict_list, showkeys=False): """ Função auxiliar que mostra o estado da lista de dicionários passada como parâmentro. Esta função tem o objetivo apenas de facilitar o debug :param dict_list: :return: """ for dicio in dict_list: print(dicio) if showkeys: print("-------------------------------------------------") print("KEYS LIST") print("[", end='') for key in dict_list[0].keys(): print(key, end=' ') print("]") print("-------------------------------------------------") def create_related_csv(filedirectory, dict_list): #['id_custeio', 'nome']
# -*- coding: UTF-8 -*- import urllib2 from datetime import datetime import sys from readcsv import readcsv from bs4 import BeautifulSoup reload(sys) sys.setdefaultencoding('UTF-8') stock_id_dict = readcsv() def examine_ipodate(stocks_array): # stocks_array = stocks.split("\n") stocks_to_remove_array = [] print stocks_array for x in stocks_array: if x.startswith("00"): # delete ETF print "ETF: "+ x stocks_array.remove(x) if int(x)>=10000: # delete 可轉換公司債 stocks_to_remove_array.append(str(x)) for x in stocks_to_remove_array: stocks_array.remove(x) stock_examined_id = [] stock_examined_shortname = [] stock_failed_id = [] stock_failed_shortname = []
#plotshapefile.plotshapefile('C:/_DATA/CancerData/SatScan/NorthEasternUS.shp', plotvalue, 'Set3', 'maxllr_data') #debug---------------/ return maxllr #-------------------------------------------------------------------------- #MAIN if __name__ == "__main__": start_time = datetime.now() print "begin at " + str(start_time) filePath = 'C:/_DATA/CancerData/NewEnglandDiseaseData/' randomdatacsv = filePath + 'format/Random.csv' r = readcsv.readcsv(randomdatacsv, 'int') randomdata = r.getData() pop = randomdata[:,1] popsum = sum(pop) repeatTime = len(randomdata[0,:])-2 repeatTime = 1 numUnit = len(pop) maxLLRlist = [] llr = likelihoodratio.likelihoodratio(sum(randomdata[:,2]), popsum) printstep = 100 for repeat in xrange(repeatTime): if repeat%printstep == 0: print repeat, str(datetime.now()) repeat = 2
tap_nb = deepcopy(traintable) for x in tap_nb: #if hamkhoangcach(x,z,w,colnum-1) < d: #truyen vao colnum da~ xac dinh d = hamkhoangcach(x,z,w,colnum-1) x.append(d) print("phan tu x trong nb",x) print("khoang cach: ",d) #tap_nb.append(y) tap_nb.sort(key=lambda y: y[-1]) #y[-1] lay phan tu cuoi cung tap_nbk = tap_nb[0:k] #lay k phan tu dau tien return tap_nbk if __name__ == '__main__': w = [1,1,1,1,1,1,1,1] train = readcsv('prostate-training-data.csv',1) print("file train: ",train) test = readcsv('prostate-test-Vi-Du.csv',0) #print("row cua test:", len(test[2])) print("file test:",test) colnum = train[1] #tim max chuan hoa print("colnum :",colnum ) print("max cua train: ", maxcol(colnum, train[2])) print("max cua test: ", maxcol(colnum, test[2])) maxchuanhoa = maxall(colnum,test[2],train[2]) print("max chuan hoa",maxchuanhoa)
if stopflag: return templist else: return checkContiguous(ctg, newRegionDict, regiondict) # -------------------------------------------------------------------------- # MAIN if __name__ == "__main__": start_time = datetime.now() print "begin at " + str(start_time) filePath = "C:/_DATA/CancerData/NewEnglandDiseaseData/" datacsv = "C:/_DATA/CancerData/NewEnglandDiseaseData/format/simundernull9999.csv" r = readcsv.readcsv(datacsv) data = r.getData() pop = data[:, 1] ctgcsv = "C:/_DATA/CancerData/SatScan/NortheeaternUS_re_contiguity.ctg" ctgdic = readctg(ctgcsv) ## read image via matplotlib # im = mpimg.imread(imagefile) # num_row, num_col = im.shape # num_row, num_col = int(num_row), int(num_col) # im.shape = (-1,1) # imagedata = im[:,0].tolist() ## global variable alpha = 1 beta = 0.1
import config import readcsv import datacleaning import featurescaling import algorithm import predict import writecsv import numpy data = readcsv.readcsv(config.trainingset) config.data = data config.row_ = data.shape[0] config.col_ = data.shape[1] datacleaning.cabin(config.data, 10) # Problem specific data cleaning datacleaning.gender(config.data, 3) # Problem specific data cleaning feature = config.allowed_feature config.cdata = datacleaning.reduce_features(config.row_, len(feature), feature, config.data) config.cdata = datacleaning.polynomial_feature_cubic(config.cdata) crow_ = (config.cdata).shape[0] ccol_ = (config.cdata).shape[1] featurescaling.featurescaling(config.cdata) feature = config.outputlabel y = datacleaning.get_output_label(config.row_, feature, config.data) config.y = y theta = numpy.zeros((ccol_, 1)) print "training....."
parser.add_argument('--csvcolumnseparator', default=',') parser.add_argument('--csvemptycell', default='') parser.add_argument('--csvcolorformat', default='html') parser.add_argument('--nullcolor', default=__NULL_COLOR) parser.add_argument('--topdepth', '-t', type=float, default=np.nan) parser.add_argument('--bottomdepth', '-b', type=float, default=np.nan) parser.add_argument('--topshift', type=float, default=0.0) parser.add_argument('--bottomshift', type=float, default=0.0) parser.add_argument('--width', type=int) parser.add_argument('--height', type=int) parser.add_argument('--outputfilename', '-o') args = parser.parse_args() layersheader, layersdata = readcsv.readcsv(args.layersfilename, delimiter=args.csvcolumnseparator, headerlines=args.layersskiplines, onlystr=True) layercodes = layersdata[args.layerscodecolumn - 1] tops = layersdata[args.layerstopcolumn - 1] bottoms = layersdata[args.layersbottomcolumn - 1] if tops[0] == args.csvemptycell: tops[0] = np.nan if bottoms[-1] == args.csvemptycell: bottoms[-1] = np.nan tops = np.array(list(map(float, tops))) bottoms = np.array(list(map(float, bottoms))) if args.topdepth is not np.nan and args.bottomdepth is not np.nan: n = args.height
Duczmal_d = [95,99,109,110,113,117,129,142,148,150,159,160,164,180,182] Duczmal_e = [84,95,99,102,109,110,113,116,117,129,142,148,150,159,160,164,180,182,232,233,236] #Duczmal_f = [] Duczmal_j = [61,76,78,89,94,98,106,111,119,121,123,131,136,151,154,157,158,160,161,162,163,164,165,167,168,169,\ 170,171,172,173,174,175,178,179,180,181,182,183,184,185,227,228,230,231,232,233,234,235,236,237,238,239,240,241,242] Duczmal_k = Duczmal_j + [84,88,90,91,92,93,95,96,99,102,114,118,135,139,140,144,145,146,147,148,150,153,156,159] Duczmal_k.remove(169) clustername = ['6mixed16','6rural16','6urban16','Duczmal_a','Duczmal_b',\ 'Duczmal_c','Duczmal_d','Duczmal_e','Duczmal_j','Duczmal_k']#'Duczmal_f','Duczmal_g','Duczmal_h',\ ##'Duczmal_i','Duczmal_j'] clusterIDlist = [mixed, rural, urban, Duczmal_a, Duczmal_b, Duczmal_c, Duczmal_d, Duczmal_e, Duczmal_j, Duczmal_k] tractcsv = 'C:/_DATA/CancerData/NewEnglandDiseaseData/tract/tractinfo_intonly.csv' r = readcsv.readcsv(tractcsv) data = r.getData() for clusteridx in xrange(len(clustername)): clustertype = clustername[clusteridx] cluster = clusterIDlist[clusteridx] cluster_tract = [] for i in xrange(len(data[:,-2])): if int(data[i,-2]) in cluster: cluster_tract.append(i) print clustertype, cluster_tract print "end at " + str(finish_time) print 'Time for whole procedure: ' + str(finish_time - start_time)
print datadict for key in sorted(datadict): #print "%s: %s" % (key, datadict[key]) print key, '\t', datadict[key] ''' def weightedchoice(cumsumhere, totalsum): y = random.randint(totalsum) filepath = 'C:/_DATA/CancerData/NewEnglandDiseaseData/' tractcsv = filepath + 'tract/tractinfo_intonly.csv' countycsv = filepath + 'tract/countyinfo_intonly.csv' r = readcsv.readcsv(tractcsv) tractdata = r.getData() r = readcsv.readcsv(countycsv) countydata = r.getData() #countydict = {} countytractid = [] countytractpop = [] for i in xrange(len(countydata)): #countydict[int(countydata[i,4])] = countydata[i,0] countytractid.append([]) countytractpop.append([]) for i in xrange(len(tractdata)): #id = countydict[int(tractdata[i,6])]
Duczmal_d = [95,99,109,110,113,117,129,142,148,150,159,160,164,180,182] Duczmal_e = [84,95,99,102,109,110,113,116,117,129,142,148,150,159,160,164,180,182,232,233,236] #Duczmal_f = [] Duczmal_j = [61,76,78,89,94,98,106,111,119,121,123,131,136,151,154,157,158,160,161,162,163,164,165,167,168,169,\ 170,171,172,173,174,175,178,179,180,181,182,183,184,185,227,228,230,231,232,233,234,235,236,237,238,239,240,241,242] Duczmal_k = Duczmal_j + [84,88,90,91,92,93,95,96,99,102,114,118,135,139,140,144,145,146,147,148,150,153,156,159] Duczmal_k.remove(169) clustername = ['6mixed16','6rural16','6urban16','Duczmal_a','Duczmal_b',\ 'Duczmal_c','Duczmal_d','Duczmal_e','Duczmal_j','Duczmal_k']#'Duczmal_f','Duczmal_g','Duczmal_h',\ ##'Duczmal_i','Duczmal_j'] clusterIDlist = [mixed, rural, urban, Duczmal_a, Duczmal_b, Duczmal_c, Duczmal_d, Duczmal_e, Duczmal_j, Duczmal_k] popcsv = 'C:/_DATA/CancerData/NewEnglandDiseaseData/format/pop.csv' r = readcsv.readcsv(popcsv) pop = r.getData() pop = pop[:,1] sumpop = sum(pop) power = [] # the p-value of one unit in the true cluster is less than 0.05 power2 = [] # as long as the p-value of one unit is less than 0.05 (no matter if it belongs to true cluster) ppv = [] sensitivity = [] misclassification = [] for clusteridx in xrange(len(clustername)): #for clusteridx in xrange(1): #clusteridx = 3 clustertype = clustername[clusteridx] cluster = clusterIDlist[clusteridx]
self.power0[pid] = 1 self.power1[pid] += 1 return self.power0, self.power1 #-------------------------------------------------------------------------- #MAIN if __name__ == "__main__": # modified hot_16 = [9, 10, 12, 13, 14, 17, 20, 23, 26, 27, 28, 33, 34, 35, 38, 41, 95, 99, 109, 110, 114, 115, 117, 119, 125, 126, 127, 128, 130, 131, 133, 134, 136, 137, 138, 139, 140, 141, 142, 143, 146, 147, 149, 151, 152, 156, 157, 237 ] rural = set([9,10,12,13,14,17,20,23,26,27,28,33,34,35,38,41]) mixed = set([95,99,109,110,114,115,117,119,126,131,139,140,142,146,147,237]) urban = set([125,127,128,130,133,134,136,137,138,141,143,149,151,152,156,157]) hot_16_set = [rural, mixed, urban] #print hot_16 filePath = 'C:/_DATA/CancerData/SatScan/mult6000/three16_modify/LLR/' pvaluecsv = filePath + 'LLR_EBS_high_0_pvalue.csv' r = readcsv.readcsv(pvaluecsv) #print r data = r.getData() pvalue = data[:,-1] s = SSS_Power(range(len(pvalue)), hot_16_set) print s.getMeasure(pvalue) #print 'sigma =', s.getMeasure(pvalue)
import json import simplejson import sys, os sys.path.append(os.path.join(os.path.dirname(sys.argv[0]), '..', 'common')) import readcsv def putjson(data, filename): jsondata = simplejson.dumps(data, separators=(',', ':')) fd = open(filename, 'w') fd.write(jsondata) fd.close() csvfile = 'C:/Users/Hu/Downloads/temp/cnty_pts_lnglat.csv' r = readcsv.readcsv(csvfile, 'float') cntypts = r.getData() cntypts_object = [] for f in cntypts: t = {} t['id'] = int(f[0]) t['lng'] = f[1] t['lat'] = f[2] cntypts_object.append(t) putjson(cntypts_object, 'C:/_DATA/migration_census_2000/cnty/cnty_pts_lnglat.json')
#-------------------------------------------------------------------------- #MAIN if __name__ == "__main__": start_time = datetime.now() print "begin at " + str(start_time) #filePath = 'C:/_DATA/CancerData/SatScan/mult6000/three16_modify/LLR_buildonly/' filePath = 'C:/_DATA/CancerData/SatScan/mult6000/three16/LLR_buildonly/' datacsv = filePath + 'minpvalue_Unsmoothed_corrected.csv' #datacsv = filePath + 'minpvalue_corrected.csv' r = readcsv.readcsv(datacsv, 'float') data = r.getData() data = data * 10 repeatTime = len(data[0,:]) header = '' fileLoc = datacsv[:-4] + '_2.csv' for i in xrange(repeatTime): header += 'data' + str(i) + ',' np.savetxt(fileLoc, data, delimiter=',', comments='', header = header[:-1], fmt = '%s') finish_time = datetime.now() print "end at " + str(finish_time) print 'Time for whole procedure: ' + str(finish_time - start_time) print "========================================================================"
import simplejson import sys, os sys.path.append(os.path.join(os.path.dirname(sys.argv[0]), '..', 'common')) import readcsv import numpy as np def putjson(data, filename): jsondata = simplejson.dumps(data, separators=(',', ':')) fd = open(filename, 'w') fd.write(jsondata) fd.close() #csvfile = 'C:/_DATA/migration_census_2000/cnty/sig_flowtable_above65_allm.csv' #csvfile = 'C:/_DATA/migration_census_2000/cnty/sig_flowtable_allage_allm.csv' csvfile = 'C:/_DATA/migration_census_2009/cnty/sig_flowtable_allage_1m.csv' r = readcsv.readcsv(csvfile, 'int') flowdata = r.getData() # oid, did, volume #csvfile = 'C:/_DATA/migration_census_2000/cnty/cnty_pts_lnglat.csv' #txtfile = 'C:/_DATA/migration_census_2000/cnty/cnty_pts_lnglat.txt' txtfile = 'C:/_DATA/migration_census_2009/cnty/cnty_pts_lnglat.txt' #r = readcsv.readcsv(csvfile, 'float') #cntypnts = r.getData() # id, x, y cntypnts = [line.strip() for line in open(txtfile)] output = '{"type":"FeatureCollection","features":[\n' i = 0 flowdata = flowdata[np.argsort(flowdata[:,2]),:] for f in flowdata: output += '{"type":"Feature","id":"' + str(i) +'","properties":{"volume":' + str(f[2]) output += '},"geometry":{"type":"LineString","coordinates":[['
print '75Q:', temp_3Q return [temp_mean, temp_1Q, temp_median, temp_3Q] #-------------------------------------------------------------------------- #MAIN if __name__ == "__main__": start_time = datetime.now() print "begin at " + str(start_time) filePath = 'C:/_DATA/CancerData/SatScan/mult6000/' originaldatacsv = filePath + 'TPFP_original_summary.csv' # satscan_rate satscan_rate_error satscan_elliptic_rate satscan_elliptic_rate_error LLR_rate LLR_rate_error r = readcsv.readcsv(originaldatacsv) originaldata = r.getData() modifieddatacsv = filePath + 'TPFP_modified_summary.csv' # satscan_rate satscan_rate_error satscan_elliptic_rate satscan_elliptic_rate_error LLR_rate LLR_rate_error r = readcsv.readcsv(modifieddatacsv) modifieddata = r.getData() interval = np.arange(0,1,0.001) #np.savetxt(fileLoc, zip(np.mean(intervalvalue,0),np.std(intervalvalue,0)), delimiter=',', comments='', header = 'rate,error', fmt = '%s') fig = plt.figure() ax = fig.add_subplot(111) ax1 = fig.add_subplot(121) ax2 = fig.add_subplot(122)
def column(col,table): #for row in range(0,rownum): #print(len(table)) for row in range(0,len(table)): #print(row) yield math.fabs(table[row][col]) def maxcolumn(colnum,table): for col in range(0,colnum-1): #print(col) yield max([x for x in column(col,table)]) def maxcol(colnum,table): return [x for x in maxcolumn(colnum,table)] def maxall(colnum,test,train): testmax = maxcol(colnum,test) trainmax = maxcol(colnum,train) _maxall = [] for col in range(0,colnum-1): _maxall.append(max(testmax[col],trainmax[col])) return _maxall if __name__ == '__main__': rownum,colnum,table = readcsv(file='prostate-training-data.csv') #print(x for x in collumn(rownum,colnum,table)) print(maxcol(rownum,colnum,table))
def test_readcsv(self): # on test si la sortie de la fonction test_readcsv est de type class self.assertIsInstance(type(readcsv('Region')), type) # on test que la sortie est != de None self.assertIsNotNone(readcsv('Region'))
urban = [105,107,112,120,122,125,127,128,130,133,134,141,143,149,152,155] hot_16 = mixed + rural + urban ''' ## modified risk area hot_16 = [9, 10, 12, 13, 14, 17, 20, 23, 26, 27, 28, 33, 34, 35, 38, 41, 95, 99, 109, 110, 114, 115, 117, 119, 125, 126, 127, 128, 130, 131, 133, 134, 136, 137, 138, 139, 140, 141, 142, 143, 146, 147, 149, 151, 152, 156, 157, 237 ] #filePath = 'C:/_DATA/CancerData/SatScan/mult6000/three16/' filePath = 'C:/_DATA/CancerData/SatScan/mult6000/three16_modify/' datacsv = 'C:/_DATA/CancerData/SatScan/mult6000/three16_modify/three16_format_modify.csv' #datacsv = 'C:/_DATA/CancerData/SatScan/mult6000/three16_format.csv' r = readcsv.readcsv(datacsv) data = r.getData() pop = data[:,1] datacsv = 'C:/_DATA/CancerData/SatScan/mult6000/three16_modify/three16_lebs_rate_newCTG.csv' r = readcsv.readcsv(datacsv) smoothdata = r.getData() ctgcsv = 'C:/_DATA/CancerData/SatScan/NortheeaternUS_re_contiguity.ctg' ctgdic = readctg(ctgcsv) ## read image via matplotlib #im = mpimg.imread(imagefile) #num_row, num_col = im.shape #num_row, num_col = int(num_row), int(num_col) #im.shape = (-1,1) #imagedata = im[:,0].tolist()
Duczmal_k.remove(169) clusteridx = 9 clusterlist = ['6mixed16','6rural16','6urban16','Duczmal_a','Duczmal_b',\ 'Duczmal_c','Duczmal_d','Duczmal_e','Duczmal_j','Duczmal_k']#'Duczmal_f','Duczmal_g','Duczmal_h',\ ##'Duczmal_i','Duczmal_j'] clusterIDlist = [mixed, rural, urban, Duczmal_a, Duczmal_b, Duczmal_c, Duczmal_d, Duczmal_e, Duczmal_j, Duczmal_k] clustertype = clusterlist[clusteridx] cluster = clusterIDlist[clusteridx] print 'Cluster type: ', clustertype # filePath = 'C:/_DATA/CancerData/NewEnglandDiseaseData/' datacsv = filePath + 'format/' + clustertype + '.csv' r = readcsv.readcsv(datacsv, 'int') #print randomdatacsv data = r.getData() pop = data[:,1] randomLLRcsv = filePath + 'LLR_buildonly/Random/maxllr.csv' r = readcsv.readcsv(randomLLRcsv) randomLLR = r.getData() randomLLR = randomLLR[:,0] randomCount = len(randomLLR) + 1 repeatTime = len(data[0,:])-2 #repeatTime = 100 numUnit = len(pop) maxPvaluelist = []
Duczmal_d = [95,99,109,110,113,117,129,142,148,150,159,160,164,180,182] Duczmal_e = [84,95,99,102,109,110,113,116,117,129,142,148,150,159,160,164,180,182,232,233,236] #Duczmal_f = [] Duczmal_j = [61,76,78,89,94,98,106,111,119,121,123,131,136,151,154,157,158,160,161,162,163,164,165,167,168,169,\ 170,171,172,173,174,175,178,179,180,181,182,183,184,185,227,228,230,231,232,233,234,235,236,237,238,239,240,241,242] Duczmal_k = Duczmal_j + [84,88,90,91,92,93,95,96,99,102,114,118,135,139,140,144,145,146,147,148,150,153,156,159] Duczmal_k.remove(169) clustername = ['6mixed16','6rural16','6urban16','Duczmal_a','Duczmal_b',\ 'Duczmal_c','Duczmal_d','Duczmal_e','Duczmal_j','Duczmal_k']#'Duczmal_f','Duczmal_g','Duczmal_h',\ ##'Duczmal_i','Duczmal_j'] clusterIDlist = [mixed, rural, urban, Duczmal_a, Duczmal_b, Duczmal_c, Duczmal_d, Duczmal_e, Duczmal_j, Duczmal_k] popcsv = 'C:/_DATA/CancerData/NewEnglandDiseaseData/format/pop.csv' r = readcsv.readcsv(popcsv) pop = r.getData() pop = pop[:,1] sumpop = sum(pop) power = [] # the p-value of one unit in the true cluster is less than 0.05 power2 = [] # as long as the p-value of one unit is less than 0.05 (no matter if it belongs to true cluster) ppv = [] sensitivity = [] misclassification = [] for clusteridx in xrange(len(clustername)): #for clusteridx in xrange(1): #clusteridx = 9 clustertype = clustername[clusteridx] cluster = clusterIDlist[clusteridx]
import readcsv import datetime import pandas as pd import calculate CSV_NAME = '平衡型基金_NAV_Return.csv' test_list_of_fund = ['21719942', '42334401A', '42334401B'] test_ratio_of_fund = [0.2, 0.5, 0.3] CD_NUMBER = 0.6 / 52 #better to depend on real number, not a stable one if __name__ == '__main__': # start = datetime.datetime.strptime("2016/5/6","%Y/%m/%d") # end = datetime.datetime.strptime("2019/4/26","%Y/%m/%d") start = datetime.datetime.strptime("2016/5/6", "%Y/%m/%d") end = datetime.datetime.strptime("2016/12/6", "%Y/%m/%d") df = readcsv.readcsv(CSV_NAME, start, end, CD_NUMBER) print("finish df") # print(df) # cor = calculate.cal_cor(df,"normal") # print(cor.head(10)) # print("===============") # cor_down = calculate.cal_cor(df,"downside") # print(cor_down) # print("finish cor") risk = calculate.cal_co_risk(test_list_of_fund, test_ratio_of_fund, df) print(risk)
print("--------------------------------------------------------------------------------------------------------------------------------") print("Chave primaria já existe: " + str(ViolacaoPK), end='') print("Comando excluido da lista:\n" + sql_insert) # Retira o comando problematico da lista e reexecuta a função insert_list.remove(sql_insert) execute_insert_list(insert_list) except psycopg2.InternalError as ErroInterno: print("Erro interno: " + str(ErroInterno)) print("--------------------------------------------------------------------------------------------------------------------------------") # Recupera uma lista de dicionários com os dados a serem inseridos no banco... insert_list = construct_insert_list( readcsv(CSV_PATH), TABLE_NAME) # ...e constroi uma lista de inserts com esta lista execute_insert_list(insert_list) """ TIPOS DE DADOS A SEREM TRATADOS - Com aspas 1. string 2. date - Sem aspas 1. Numericos 2. boolean OBS: Dados do CSV que tenham alguma função dentro dos comandos do banco de dados(como aspas) devem ser tratados, por exemplo: - Tratar aspas simple substituindo-as por aspas duplas """
import config import readcsv import datacleaning import featurescaling import algorithm import predict import writecsv import numpy data = readcsv.readcsv(config.trainingset) config.data = data config.row_ = data.shape[0] config.col_ = data.shape[1] datacleaning.cabin(config.data, 10) #Problem specific data cleaning datacleaning.gender(config.data, 3) #Problem specific data cleaning feature = config.allowed_feature config.cdata = datacleaning.reduce_features(config.row_, len(feature), feature, config.data) config.cdata = datacleaning.polynomial_feature_cubic(config.cdata) crow_ = (config.cdata).shape[0] ccol_ = (config.cdata).shape[1] featurescaling.featurescaling(config.cdata) feature = config.outputlabel y = datacleaning.get_output_label(config.row_, feature, config.data) config.y = y theta = numpy.zeros((ccol_, 1)) print "training....."
#filePath = 'C:/_DATA/CancerData/SatScan/mult6000/three16/satscan/' #filePath = 'C:/_DATA/CancerData/SatScan/mult6000/three16/satscan/elliptic/' filePath = 'C:/_DATA/CancerData/SatScan/mult6000/three16/LLR_buildonly/' #filePath = 'C:/_DATA/CancerData/SatScan/mult6000/three16/' #filePath = 'C:/_DATA/CancerData/SatScan/mult6000/redcap/three16/LLR/EBS/constraint/5p_improved/' #filePath = 'C:/_DATA/CancerData/SatScan/mult6000/three16_modify/' #filePath = 'C:/_DATA/CancerData/SatScan/mult6000/three16_modify/satscan/' #filePath = 'C:/_DATA/CancerData/SatScan/mult6000/three16_modify/satscan/elliptic/' #filePath = 'C:/_DATA/CancerData/SatScan/mult6000/three16_modify/LLR_buildonly/' #minpvaluecsv = filePath + 'minpvalue_atleast2counties.csv' #minpvaluecsv = filePath + 'minpvalue2.csv' minpvaluecsv = filePath + 'minpvalue_corrected_2.csv' #minpvaluecsv = filePath + 'minpvalue_Unsmoothed_atleast2counties.csv' #minpvaluecsv = filePath + 'seg_minpvalue_beta0.1.csv' r = readcsv.readcsv(minpvaluecsv) minpvalue = r.getData() repeatTime = 1000 #pvalueLevel = -1 # -1: min pvalue, -2: last level numUnit = 255 sigma = [] s = measureSigma.SSS_sigma(range(numUnit), hot_16) row = 0 count = 0 for repeat in xrange(repeatTime): row += 1 if int(row/10) > 0: #print count*10 row = 0
parser.add_argument('--data_dir', help='Path to dataset images') parser.add_argument('--output_dir', help='Path to directory for saving outputs') args = parser.parse_args() train = "train_fluid_intelligence_household.csv" valid = "valid_fluid_intelligence_household.csv" test = "test_fluid_intelligence_household.csv" if not os.path.isdir(args.output_dir): try: os.mkdir(args.output_dir) except: raise Exception('Could not create output directory') csv_train = readcsv(args.data_dir, train) csv_valid = readcsv(args.data_dir, valid) csv_test = readcsv(args.data_dir, test) for key, value in csv_train.items(): print(key, value) for key, value in csv_valid.items(): print(key, value) for key, value in csv_test.items(): print(key, value) print('saving train dict!') np.save(os.path.join(args.output_dir, 'csv_train_target_dti.npy'), csv_train)