def fetch_words_by_subjects(): data = readDataFromFile('data/subjects.json') subjects = data['Subjects'] for subject in subjects: with urllib.request.urlopen("https://minder.vn/api/words/words?id_subject={0}".format(subject['id'])) as url: data = json.loads(url.read().decode()) writeDataToFile('data/subject-{0}.json'.format(subject['id']), data)
def generateHeuristicVantagePoints(options): dim = options['numberOfDimension'] numberOfVP = options['numberOfVP'] cardinality = options['numberOfAlphabet'] typeOfVP = options['typeOfVP'] threshold = dim*0.4 vps = [] for i in xrange(numberOfVP): if i == 0: vps.append(generateUniformRandomVP(dim,cardinality)) continue while True: nvp = generateUniformRandomVP(dim,cardinality) ok = True for j in xrange(i): dist = utils.hammingDistance(vps[j],nvp) if dim-dist > threshold: ok = False break if ok: vps.append(nvp) break utils.writeDataToFile('vp/vp_%d_%d_%d_%s.txt'%(dim,numberOfVP,cardinality,typeOfVP),vps)
def generateVantagePointsWithManyAlgorithm(options): numberOfData = options['numberOfData'] dim = options['numberOfDimension'] numberOfVP = options['numberOfVP'] cardinality = options['numberOfAlphabet'] typeOfVP = options['typeOfVP'] vps = [] vps.append(generateUniformRandomVP(dim, cardinality)) dist = [0 for i in xrange(dim + 1)] while len(vps) < numberOfVP: cur_vp = generateUniformRandomVP(dim, cardinality) vps.append(cur_vp) if ok(dist, dim): continue ss = calculateMany(vps) nnext = False print len(vps), for sss in ss: if dist[sss] != 0: nnext = True if nnext: vps = vps[:-1] continue for sss in ss: dist[sss] += 1 utils.writeDataToFile( 'vp/vp_%d_%d_%d_%s.txt' % (dim, numberOfVP, cardinality, typeOfVP), vps)
def generateVantagePointsWithManyAlgorithm(options): numberOfData = options['numberOfData'] dim = options['numberOfDimension'] numberOfVP = options['numberOfVP'] cardinality = options['numberOfAlphabet'] typeOfVP = options['typeOfVP'] datas = utils.getDataInFile(utils.getDataFileName(options)) vps = [] vps.append(datas[0]) while len(vps) < numberOfVP: print len(vps) ans,ansDataIndex = -1,-1 for i in xrange(len(datas)): ok = False for j in xrange(len(vps)): if datas[i] == vps[j]: ok = True if ok: continue vps.append(datas[i]) cur = calculateMany(vps) if cur > ans: ans,ansDataIndex = cur,i vps = vps[:-1] vps.append(datas[ansDataIndex]) utils.writeDataToFile('vp/vp_%d_%d_%d_%s.txt'%(dim,numberOfVP,cardinality,typeOfVP),vps)
def generateVantagePointsWithPattern(options): numberOfData = options['numberOfData'] dim = options['numberOfDimension'] numberOfVP = options['numberOfVP'] cardinality = options['numberOfAlphabet'] typeOfVP = options['typeOfVP'] datas = utils.getDataInFile(utils.getDataFileName(options)) majorPattern = [[] for i in xrange(numberOfVP)] minorPattern = [[] for i in xrange(numberOfVP)] for j in xrange(dim): d = {} for i in xrange(numberOfData): if datas[i][j] in d: d[datas[i][j]] += 1 else: d[datas[i][j]] = 0 d = sorted(d.items(), key=lambda x: x[1], reverse=True) for k in xrange(numberOfVP): majorPattern[k].append(d[k][0]) minorPattern[k].append(d[-1 - k][0]) utils.writeDataToFile( 'vp/vp_%d_%d_%d_%s.txt' % (dim, numberOfVP, cardinality, 'major'), majorPattern) utils.writeDataToFile( 'vp/vp_%d_%d_%d_%s.txt' % (dim, numberOfVP, cardinality, 'minor'), minorPattern)
def generateGreedyVantagePoints(options): alphabet = list(string.ascii_uppercase) dim = options['numberOfDimension'] numberOfVP = options['numberOfVP'] cardinality = options['numberOfAlphabet'] base = abs(float(dim)-float(dim)/float(cardinality)) getCornerPoints(0,[],dim,alphabet,cardinality) vps = [] vps.append([ 'A' for i in xrange(dim) ]) #vps.append([ 'B' for i in xrange(dim) ]) #vps.append([ 'C' for i in xrange(dim) ]) #vps.append([ 'D' for i in xrange(dim) ]) for i in xrange(numberOfVP - 1): print i #mx, mx_idx = (0,0) mn, mn_idx = (987654321.0,0) for j in xrange(len(cornerPoints)): variation = getTotalCostFunction(vps,cornerPoints[j],base) if mn > variation: mn, mn_idx = (variation,j) #if mx < dist: # mx, mx_idx = (dist,j) print mn,mn_idx,cornerPoints[mn_idx] vps.append(cornerPoints[mn_idx]) utils.writeDataToFile('vp/vp_%d_%d_%d_greedy.txt'%(dim,numberOfVP,cardinality),vps)
def generateVantagePointsWithManyAlgorithm(options): numberOfData = options['numberOfData'] dim = options['numberOfDimension'] numberOfVP = options['numberOfVP'] cardinality = options['numberOfAlphabet'] typeOfVP = options['typeOfVP'] datas = utils.getDataInFile(utils.getDataFileName(options)) vps = [] vps.append(datas[0]) while len(vps) < numberOfVP: print len(vps) ans, ansDataIndex = -1, -1 for i in xrange(len(datas)): ok = False for j in xrange(len(vps)): if datas[i] == vps[j]: ok = True if ok: continue vps.append(datas[i]) cur = calculateMany(vps) if cur > ans: ans, ansDataIndex = cur, i vps = vps[:-1] vps.append(datas[ansDataIndex]) utils.writeDataToFile( 'vp/vp_%d_%d_%d_%s.txt' % (dim, numberOfVP, cardinality, typeOfVP), vps)
def generateVantagePoints(options): dim = options['numberOfDimension'] numberOfVP = options['numberOfVP'] cardinality = options['numberOfAlphabet'] vps = [] for i in xrange(numberOfVP): vps.append(generateUniformRandomVP(dim,cardinality)) utils.writeDataToFile('vp/vp_%d_%d_%d_random.txt'%(dim,numberOfVP,cardinality),vps)
def generateRandomVP(options): numberOfData = options['numberOfData'] dim = options['numberOfDimension'] numberOfVP = options['numberOfVP'] cardinality = options['numberOfAlphabet'] typeOfVP = options['typeOfVP'] datas = utils.getDataInFile(utils.getDataFileName(options)) vps = [] for i in xrange(numberOfVP): vps.append(datas[random.randint(0, numberOfData)]) utils.writeDataToFile( 'vp/vp_%d_%d_%d_%s.txt' % (dim, numberOfVP, cardinality, typeOfVP), vps)
def generateVantagePointsWithManyAlgorithm(options): numberOfData = options['numberOfData'] dim = options['numberOfDimension'] numberOfVP = options['numberOfVP'] cardinality = options['numberOfAlphabet'] typeOfVP = options['typeOfVP'] datas = utils.getDataInFile(utils.getDataFileName(options)) majorPattern = [[] for i in xrange(numberOfVP + 1)] for j in xrange(dim): d = {} for i in xrange(numberOfData): if datas[i][j] in d: d[datas[i][j]] += 1 else: d[datas[i][j]] = 0 d = sorted(d.items(), key=lambda x: x[1], reverse=True) for k in xrange(1): majorPattern[k].append(d[k][0]) vps = [] vps.append(majorPattern[0]) d = [0 for i in xrange(dim + 1)] one_pass = True threshold = 0 while len(vps) < numberOfVP: print len(vps) ans, ansDataIndex = -1, -1 for i in xrange(len(datas)): ok = False for j in xrange(len(vps)): dist = utils.hammingDistance(datas[i], vps[j]) if d[dist] > threshold: ok = True if datas[i] == vps[j]: ok = True if ok: continue for j in xrange(len(vps)): dist = utils.hammingDistance(datas[i], vps[j]) d[dist] += 1 vps.append(datas[i]) threshold += 1 utils.writeDataToFile( 'vp/vp_%d_%d_%d_%s.txt' % (dim, numberOfVP, cardinality, typeOfVP), vps)
def generateVantagePointsWithManyAlgorithm(options): numberOfData = options['numberOfData'] dim = options['numberOfDimension'] numberOfVP = options['numberOfVP'] cardinality = options['numberOfAlphabet'] typeOfVP = options['typeOfVP'] datas = utils.getDataInFile(utils.getDataFileName(options)) majorPattern = [[] for i in xrange(numberOfVP + 1)] for j in xrange(dim): d = {} for i in xrange(numberOfData): if datas[i][j] in d: d[datas[i][j]] += 1 else: d[datas[i][j]] = 0 d = sorted(d.items(), key=lambda x: x[1], reverse=True) for k in xrange(1): majorPattern[k].append(d[k][0]) vps = [] vps.append(majorPattern[0]) while len(vps) < numberOfVP: print len(vps) ans, ansDataIndex = -1, -1 for i in xrange(len(datas)): ok = False for j in xrange(len(vps)): if datas[i] == vps[j]: ok = True if ok: continue vps.append(datas[i]) cur = calculateMany(vps) if cur > ans: ans, ansDataIndex = cur, i vps = vps[:-1] vps.append(datas[ansDataIndex]) utils.writeDataToFile( 'vp/vp_%d_%d_%d_%s.txt' % (dim, numberOfVP, cardinality, typeOfVP), vps)
def convertNDDSToCDS(options): size = options['numberOfData'] dim = options['numberOfDimension'] distribution = options['distribution'] cardinality = options['numberOfAlphabet'] numberOfVP = options['numberOfVP'] typeOfVP = options['typeOfVP'] dataFileName = 'data/data_%d_%d_%s_%d.txt' % (size, dim, distribution, cardinality) queryFileName = 'query/query_%d_%d_%s_%d.txt' % (size, dim, distribution, cardinality) vpFileName = 'vp/vp_%d_%d_%d_%s.txt' % (dim, numberOfVP, cardinality, typeOfVP) cdsDataFileName = utils.getCDSDataFileName(options) cdsQueryFileName = utils.getCDSQueryFileName(options) # cdsDataFileName = 'cds_data/data_%d_%d_%s_%d_%s.txt'%(size,numberOfVP,distribution,cardinality,typeOfVP) # cdsQueryFileName = 'cds_query/query_%d_%d_%s_%d_%s.txt'%(size,numberOfVP,distribution,cardinality,typeOfVP) datas = utils.getDataInFile(dataFileName) querys = utils.readDataFromFile(queryFileName) vps = utils.readDataFromFile(vpFileName) print len(datas), len(querys), len(vps) cdsDatas = [] for i in xrange(len(datas)): t = [] for j in xrange(len(vps)): t.append(utils.hammingDistance(datas[i], vps[j])) cdsDatas.append(t) utils.writeDataToFile(cdsDataFileName, cdsDatas) cdsQuerys = [] for i in xrange(len(querys)): t = [] for j in xrange(len(vps)): t.append(utils.hammingDistance(querys[i], vps[j])) cdsQuerys.append(t) utils.writeDataToFile(cdsQueryFileName, cdsQuerys) print cdsDataFileName, cdsQueryFileName
def generateVantagePointsWithPattern(options): numberOfData = options['numberOfData'] dim = options['numberOfDimension'] numberOfVP = options['numberOfVP'] cardinality = options['numberOfAlphabet'] typeOfVP = options['typeOfVP'] datas = utils.getDataInFile(utils.getDataFileName(options)) majorPattern = [ [] for i in xrange(numberOfVP) ] minorPattern = [ [] for i in xrange(numberOfVP) ] for j in xrange(dim): d = {} for i in xrange(numberOfData): if datas[i][j] in d: d[datas[i][j]] += 1 else : d[datas[i][j]] = 0 d = sorted(d.items(), key=lambda x: x[1], reverse=True) for k in xrange(numberOfVP): majorPattern[k].append(d[k][0]) minorPattern[k].append(d[-1-k][0]) utils.writeDataToFile('vp/vp_%d_%d_%d_%s.txt'%(dim,numberOfVP,cardinality,'major'),majorPattern) utils.writeDataToFile('vp/vp_%d_%d_%d_%s.txt'%(dim,numberOfVP,cardinality,'minor'),minorPattern)
def convertNDDSToCDS(options): size = options['numberOfData'] dim = options['numberOfDimension'] distribution = options['distribution'] cardinality = options['numberOfAlphabet'] numberOfVP = options['numberOfVP'] typeOfVP = options['typeOfVP'] dataFileName = 'data/data_%d_%d_%s_%d.txt'%(size,dim,distribution,cardinality) queryFileName = 'query/query_%d_%d_%s_%d.txt'%(size,dim,distribution,cardinality) vpFileName = 'vp/vp_%d_%d_%d_%s.txt'%(dim,numberOfVP,cardinality,typeOfVP) cdsDataFileName = utils.getCDSDataFileName(options) cdsQueryFileName= utils.getCDSQueryFileName(options) # cdsDataFileName = 'cds_data/data_%d_%d_%s_%d_%s.txt'%(size,numberOfVP,distribution,cardinality,typeOfVP) # cdsQueryFileName = 'cds_query/query_%d_%d_%s_%d_%s.txt'%(size,numberOfVP,distribution,cardinality,typeOfVP) datas = utils.getDataInFile(dataFileName) querys = utils.readDataFromFile(queryFileName) vps = utils.readDataFromFile(vpFileName) print len(datas),len(querys),len(vps) cdsDatas = [] for i in xrange(len(datas)): t = [] for j in xrange(len(vps)): t.append(utils.hammingDistance(datas[i],vps[j])) cdsDatas.append(t) utils.writeDataToFile(cdsDataFileName,cdsDatas) cdsQuerys = [] for i in xrange(len(querys)): t = [] for j in xrange(len(vps)): t.append(utils.hammingDistance(querys[i],vps[j])) cdsQuerys.append(t) utils.writeDataToFile(cdsQueryFileName,cdsQuerys) print cdsDataFileName, cdsQueryFileName
#!/usr/bin/python #-*- coding:utf-8 -*- import random import os import sys import glob import utils if __name__ == '__main__': utils.createDirectory('query') dataFileNames = glob.glob('data/*.txt') for dataFileName in dataFileNames: onlyFileName = dataFileName.split('.')[0].split('/')[1] size = onlyFileName.split('_')[1] dim = onlyFileName.split('_')[2] vptype = onlyFileName.split('_')[3] cardinality = onlyFileName.split('_')[4] queryFileName = 'query/query_%s_%s_%s_%s.txt'%(size,dim,vptype,cardinality) if os.path.exists(queryFileName): print '%s is exists'%(queryFileName) continue print queryFileName datas = utils.getDataInFile(dataFileName) queryDatas = [] for i in xrange(100): queryDatas.append(datas[random.randrange(0,int(size))]) utils.writeDataToFile(queryFileName,queryDatas)
utils.writeNpErrToFile('envAccu:', np.mean(envErrsNpList[1:j + 1, :], axis=0), testingLog, epoch, j) albedoList = [ x[0:-4] + '_c{0}.hdf5'.format(opt.cascadeLevel) for x in albedoNameBatch ] normalList = [x.replace('albedo', 'normal') for x in albedoList] roughList = [x.replace('albedo', 'rough') for x in albedoList] depthList = [x.replace('albedo', 'depth') for x in albedoList] envList = [x.replace('albedo', 'env') for x in albedoList] imP2List = [x.replace('albedo', 'imgPoint_b2') for x in albedoList] imP3List = [x.replace('albedo', 'imgPoint_b3') for x in albedoList] utils.writeDataToFile(albedoPreds[-1] * segBatch.expand_as(albedoBatch), albedoList) utils.writeDataToFile(normalPreds[-1] * segBatch.expand_as(normalBatch), normalList) utils.writeDataToFile(roughPreds[-1] * segBatch.expand_as(roughBatch), roughList) utils.writeDataToFile(depthPreds[-1] * segBatch.expand_as(depthBatch), depthList) utils.writeDataToFile( (2 * globalIllu2s[-1] - 1) * segBatch.expand_as(imP2Batch), imP2List) utils.writeDataToFile( (2 * globalIllu3s[-1] - 1) * segBatch.expand_as(imP3Batch), imP3List) utils.writeDataToFile(SHPreds[-1], envList) testingLog.close() # Save the error record
def generateVantagePointsWithHybridAlgorithm(options): numberOfData = options['numberOfData'] dim = options['numberOfDimension'] numberOfVP = options['numberOfVP'] cardinality = options['numberOfAlphabet'] typeOfVP = options['typeOfVP'] datas = utils.getDataInFile(utils.getDataFileName(options)) majorPattern = [[] for i in xrange(numberOfVP + 1)] for j in xrange(dim): d = {} for i in xrange(numberOfData): if datas[i][j] in d: d[datas[i][j]] += 1 else: d[datas[i][j]] = 0 d = sorted(d.items(), key=lambda x: x[1], reverse=True) for k in xrange(1): majorPattern[k].append(d[k][0]) vps = [] vps.append(majorPattern[0]) d = [0 for i in xrange(dim + 1)] one_pass = False while len(vps) < numberOfVP: print len(vps) ans, ansDataIndex = -1, -1 if one_pass: for i in xrange(len(datas)): ok = False for j in xrange(len(vps)): dist = utils.hammingDistance(datas[i], vps[j]) if d[dist] > 1: ok = True if datas[i] == vps[j]: ok = True if ok: continue for j in xrange(len(vps)): dist = utils.hammingDistance(datas[i], vps[j]) d[dist] += 1 vps.append(datas[i]) one_pass = False else: change = False for i in xrange(dim + 1): if d[i] == 0: change = True ans, ans_vp = -1, '' fx = 987654321 for j in xrange(len(vps)): cur_vp = generateVpWithDist(dim, cardinality, vps[j], i) vps.append(cur_vp) cur, dists = calculateMany(vps) if cur > ans: ans, ans_vp = cur, cur_vp fx = max(dists) elif cur == ans: if max(dists) < fx: ans, ans_vp = cur, cur_vp fx = max(dists) vps = vps[:-1] for j in xrange(len(vps)): dist = utils.hammingDistance(vps[j], ans_vp) d[dist] += 1 vps.append(ans_vp) break if not change: vps.append(datas[random.randrange(0, numberOfData)]) utils.writeDataToFile( 'vp/vp_%d_%d_%d_%s.txt' % (dim, numberOfVP, cardinality, typeOfVP), vps)
normalName, isGama=False) for n in range(0, len(roughPreds)): roughName = [] roughName.append(imgName.replace('input', 'rough_%d' % n)) utils.writeImageToFile(0.5 * (roughPreds[n] + 1) * segBatch.expand_as(roughPreds[n]), roughName, isGama=False) for n in range(0, len(depthPreds)): depthName, depthImName = [], [] depthName.append( imgName.replace('input', 'depth_%d' % n).replace('png', 'hdf5')) utils.writeDataToFile(depthPreds[n], depthName) depthImName.append(imgName.replace('input', 'depth_%d' % n)) depthOut = 1 / torch.clamp(depthPreds[n], 1e-6, 10) depthOut = (depthOut - 0.25) / 0.8 utils.writeImageToFile(depthOut * segBatch.expand_as(depthPreds[n]), depthImName, isGama=False) for n in range(0, len(renderedEnvs)): envImName = [] envImName.append(imgName.replace('input', 'renderedEnv_%d' % n)) utils.writeImageToFile(renderedEnvs[n] * segBatch.expand_as(renderedEnvs[n]), envImName, isGama=True)
def convertNDDSToCDS(options): size = options['numberOfData'] dim = options['numberOfDimension'] distribution = options['distribution'] cardinality = options['numberOfAlphabet'] numberOfVP = options['numberOfVP'] typeOfVP = options['typeOfVP'] dataFileName = 'data/data_%d_%d_%s_%d.txt' % (size, dim, distribution, cardinality) queryFileName = 'query/query_%d_%d_%s_%d.txt' % (size, dim, distribution, cardinality) vpFileName = 'vp/vp_%d_%d_%d_%s.txt' % (dim, numberOfVP, cardinality, typeOfVP) # cdsDataFileName = utils.getCDSDataFileName(options) # cdsQueryFileName= utils.getCDSQueryFileName(options) cdsDataFileName = 'cds_data/data_%d_%d_%d_%s_%d_%sgeh.txt' % ( size, dim, numberOfVP, distribution, cardinality, typeOfVP) cdsQueryFileName = 'cds_query/query_%d_%d_%d_%s_%d_%sgeh.txt' % ( size, dim, numberOfVP, distribution, cardinality, typeOfVP) datas = utils.getDataInFile(dataFileName) querys = utils.readDataFromFile(queryFileName) vps = utils.readDataFromFile(vpFileName) print len(datas), len(querys), len(vps) d = [{} for i in xrange(dim)] for i in xrange(len(datas)): for j in xrange(dim): if datas[i][j] in d[j]: d[j][datas[i][j]] += 1 else: d[j][datas[i][j]] = 1 for i in xrange(dim): for key in d[i]: d[i][key] = 1.0 - float(d[i][key]) / float(len(datas)) def geh(a, b): ret = 0.0 for i in xrange(len(a)): if a[i] != b[i]: ret += 1.0 else: ret += d[i][a[i]] / float(dim) return ret / float(dim) cdsDatas = [] for i in xrange(len(datas)): t = [] for j in xrange(len(vps)): t.append(geh(datas[i], vps[j])) cdsDatas.append(t) utils.writeDataToFile(cdsDataFileName, cdsDatas) cdsQuerys = [] for i in xrange(len(querys)): t = [] for j in xrange(len(vps)): t.append(geh(querys[i], vps[j])) cdsQuerys.append(t) utils.writeDataToFile(cdsQueryFileName, cdsQuerys) print cdsDataFileName, cdsQueryFileName
def fetch_subjects(): with urllib.request.urlopen("https://minder.vn/api/subjects/subjects?id_course={0}&limit={1}&skip={2}" .format(id_course, limit, skip)) as url: data = json.loads(url.read().decode()) writeDataToFile('data/subjects.json', data)
def main(arguments): parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('-l', '--launch_directory', action=readable_dir, default="") parser.add_argument( '-w', '--destination', action=readable_dir, help="Output directory", default="/nfs/www-prod/web_hx2/cm/metabolights/prod/reference/") parser.add_argument( '-f', '--ftp', action=readable_dir, default="/ebi/ftp/pub/databases/metabolights/compounds/", help="FTP directory") args = parser.parse_args(arguments) global workingDirectory global destinationDirectory global ftp global globalReport workingDirectory = args.launch_directory destinationDirectory = args.destination ftp = args.ftp if (workingDirectory == ""): workingDirectory = os.getcwd() # log file configuration st = utils.getDateAndTime() randomInt = str(randint(1, 1000)) logDirectory = workingDirectory + "/logs/exporter_" + st if not os.path.exists(logDirectory): os.makedirs(logDirectory) logging.basicConfig(filename=logDirectory + "/log_" + randomInt + ".log", level=logging.DEBUG) utils.init(logging) logging.info("-----------------------------------------------") logging.info('# Run started -' + utils.getDateAndTime()) requestCompoundsList = utils.fetchMetaboLightsCompoundsList() for compound in requestCompoundsList: logging.info("-----------------------------------------------") try: logging.info("Exporting: " + compound) tempCompoundReport = { "rating": 5, "flags": { "hasInchiKey": False, "hasLiterature": False, "hasReactions": False, "hasNMR": False, "hasSpecies": False, "hasMS": False, "hasPathways": False, "has3d": False } } filePath = destinationDirectory + compound + "/" + compound + "_data.json" tempCompoundReport = checkIfFileEmptyOrNotExist( filePath, tempCompoundReport) if tempCompoundReport["rating"] != 0: tempCompoundReport = setFlags(filePath, tempCompoundReport) else: logging.warning("WARNING: Missing data - " + compound) globalReport[compound] = tempCompoundReport except: logging.warning("Error: " + compound) pass utils.writeDataToFile(ftp + "ml_flags.json", globalReport)