def beta(cluster, data): # data: Data points # cluster: cluster centers distance = euclidean_distances(data, cluster) resultedClassLabel = [] for i in range(len(data)): idx, val = list_min(distance[i]) resultedClassLabel.append(idx) result = 0.0 for i in unique(resultedClassLabel): indexList = multiIndexOf(resultedClassLabel, i) elementCount = len(indexList) if elementCount <= 1: continue #_data = data[indexList] #_data = list(map(list(data).__getitem__, indexList)) _data = [data.loc[i] for i in indexList] _distance = euclidean_distances(_data, _data)**2 result = result + (sum(sum(_distance)) / (elementCount * (elementCount - 1))) #ret = "{0:.3f}".format(float(result / (len(unique(resultedClassLabel))))) ret = '{:0,.2f}'.format(result / (len(unique(resultedClassLabel)))) return ret
def read_data(x, y, split): with h5py.File(split, "r") as f: train_idx = utils.decode(f["train"][...]) val_idx = utils.decode(f["val"][...]) test_idx = utils.decode(f["test"][...]) all_idx = np.concatenate([train_idx, val_idx, test_idx], axis=0) with h5py.File(x, "r") as f: idx, mat = utils.unique(utils.decode(f["protein_id"][...]), f["mat"]) assert np.all(np.in1d(all_idx, idx)) idx_mapper = utils.get_idx_mapper(idx) x_train = mat[idx_mapper(train_idx)] x_val = mat[idx_mapper(val_idx)] x_test = mat[idx_mapper(test_idx)] with h5py.File(y, "r") as f: idx, mat = utils.unique(utils.decode(f["protein_id"][...]), f["mat"]) assert np.all(np.in1d(all_idx, idx)) idx_mapper = utils.get_idx_mapper(idx) y_train = mat[idx_mapper(train_idx)] y_val = mat[idx_mapper(val_idx)] y_test = mat[idx_mapper(test_idx)] return utils.DataDict([("x", x_train), ("y", y_train), ("protein_id", train_idx)]), utils.DataDict([ ("x", x_val), ("y", y_val), ("protein_id", val_idx) ]), utils.DataDict([("x", x_test), ("y", y_test), ("protein_id", test_idx)])
def __init__(self, split): self.train = split == 'train' self.loadinfo(default.groundtruth_file) fn = default.split_file with open(fn, 'r') as f: data = json.load(f) print 'loaded', fn self.term_list = data['term_list'] self.num_labels = len(self.term_list) prefix = split self.smp_idxs, self.labels, self.uncertain_labels = \ data['%s_lesion_idxs'%prefix], data['%s_relevant_labels'%prefix], \ data['%s_uncertain_labels'%prefix] if self.train: self.irrelevant_labels = data['train_irrelevant_labels'] self.num_smp = len(self.smp_idxs) if not hasattr(default, 'ontology'): default.ontology = load_ontology_from_xlsfile(default.ontology_file) self.ontology = default.ontology self.gen_parents_list() self.gen_children_list() self.gen_exclusive_list() if config.TRAIN.TEXT_MINED_LABEL == 'RUI' and self.train: self.labels = [r+u+i for r,u,i in zip(self.labels, self.uncertain_labels, self.irrelevant_labels)] elif config.TRAIN.TEXT_MINED_LABEL == 'RU' and self.train: self.labels = [r+u for r,u in zip(self.labels, self.uncertain_labels)] self.labels = [unique(l) for l in self.labels] self.uncertain_labels = [unique(u) for u in self.uncertain_labels] terms_all = [d['term'] for d in self.ontology] self.term_class = [self.ontology[terms_all.index(t)]['class'] for t in self.term_list] print '>>>', len(self.smp_idxs), prefix, 'samples,', keep = [i for i in range(len(self.smp_idxs)) if (not self.noisy[self.smp_idxs[i]]) and len(self.labels[i]) > 0] self.smp_idxs = [self.smp_idxs[i] for i in keep] self.num_smp = len(self.smp_idxs) print self.num_smp, 'after removing noisy and empty ones:' print self.num_labels, 'labels,', self.labels = [self.labels[i] for i in keep] self.uncertain_labels = [self.uncertain_labels[i] for i in keep] print '%d relevant cases,' % np.hstack(self.labels).shape[0], print '%d uncertain cases.' % np.hstack(self.uncertain_labels).shape[0] if default.generate_features_all: self.smp_idxs = range(len(self.filenames)) self.labels = [[0] for _ in self.smp_idxs] self.uncertain_labels = [[0] for _ in self.smp_idxs] print 'Fake evaluation, generating features for all 32735 lesions' all_labels = [lb for lbs in self.labels for lb in lbs] self.cls_sz = np.array([all_labels.count(cls) for cls in range(self.num_labels)], dtype=np.float32) self.gen_class_weights() print
def __init__(self, table, header): self.branches = {} self.table = table self.node_type = None self.split_index = None self.leaf_class = None self.header = header classes = [x[-1] for x in table] c_types = u.unique(classes) if len(c_types) == 1: self.node_type = LEAF self.leaf_class = c_types[0] # print("Creating leaf: ") # print(self.table) # print(self.leaf_class) else: self.split_index = max_gain(table, header) if self.split_index != -1: split_vals = u.unique(table, col=self.split_index) self.node_type = SPLIT branch_tabs = [[y for y in table if y[self.split_index] == x] for x in split_vals] for i, bran in enumerate(branch_tabs): self.branches[split_vals[i]] = TreeNode(bran, header) else: self.node_type = LEAF self.leaf_class = u.majority_vote(table)
def update(self, nodes): logging.debug("Adding nodes %s",' '.join( ["%032x"%(x.int_id) for x in nodes])) self.cw.extend(nodes) self.ccw.extend(nodes) self.cw = unique(self.cw) self.ccw = unique(self.ccw) self.cw.sort(key = self.cw_distance) self.ccw.sort(key = self.ccw_distance) self.cw = self.cw[:self.size] self.ccw = self.ccw[:self.size]
def estimate_parameters(series, min_size_series=50, discrete=False): """ Apply Clauset et al.'s method to find the best fit value of xmin and Alpha. **Parameters** series : series of data to be fit. min_size_series : Minimum possible size of the distribution to which power-law fit will be attempted. Fitting power-law to a very small series would give biased results where power-law may appear to be a good fit even when data is not drawn from power-law distribution. The default value is taken to be 50 as suggested in the paper. discrete : Boolean, whether to treat series as discrete or continous. Default value is False **Returns** Tuple of (Estimated xmin, Estimated Alpha value, minimum KS statistics score). """ sorted_series = sorted(series) xmin_candidates = [] x_prev = sorted_series[0] xmin_candidates.append(x_prev) for x in sorted_series: if (x > x_prev): x_prev = x xmin_candidates.append(x_prev) ks_statistics_min = sys.maxint xmin_result = 0 Alpha_result = 2 for xmin in xmin_candidates[:-1 * (min_size_series - 1)]: data = filter(lambda x: x >= xmin, sorted_series) estimated_Alpha = estimate_scaling_parameter(data, xmin) if (discrete): Px = [ zeta(estimated_Alpha, x) / zeta(estimated_Alpha, xmin) for x in unique(data) ] else: Px = [ pow(float(x) / xmin, 1 - estimated_Alpha) for x in unique(data) ] n = len(Px) Sx = [i[1] / n for i in frequency_distribution(data, pdf=False)] ks_statistics = max( map(lambda counter: abs(Sx[counter] - Px[counter]), range(0, n))) if (ks_statistics < ks_statistics_min): ks_statistics_min = ks_statistics xmin_result = xmin Alpha_result = estimated_Alpha return (xmin_result, Alpha_result, ks_statistics_min)
def fit(self): best = 0 last_save = 0 log = open(self.out + '.log', 'w') for epoch in range(1000): print('\n----------\nEPOCH %d\n----------' % epoch) self.policy_gradient() seqs = self.agent.sample(self.n_samples) ix = utils.unique(seqs) smiles = [self.agent.voc.decode(s) for s in seqs[ix]] scores = self.env(smiles, is_smiles=True) desire = (scores.DESIRE).sum() / self.n_samples score = scores[self.env.keys].values.mean() valid = scores.VALID.mean() if best <= score: torch.save(self.agent.state_dict(), self.out + '.pkg') best = score last_save = epoch print("Epoch: %d average: %.4f valid: %.4f unique: %.4f" % (epoch, score, valid, desire), file=log) for i, smile in enumerate(smiles): score = "\t".join(['%0.3f' % s for s in scores.values[i]]) print('%s\t%s' % (score, smile), file=log) if epoch - last_save > 100: break for param_group in self.agent.optim.param_groups: param_group['lr'] *= (1 - 0.01) log.close()
def framesToFrameRange(frames, sort=True, zfill=0, compress=False): """ Converts an iterator of frames into a :class:`fileseq.framerange.FrameRange`. :type frames: iterable :param frames: sequence of frames to process :type sort: bool :param sort: sort the sequence before processing :type zfill: int :param zfill: width for zero padding :type compress: bool :param compress: remove any duplicates before processing :rtype: str """ if compress: frames = unique(set(), frames) frames = list(frames) if not frames: return '' if len(frames) == 1: return pad(frames[0], zfill) if sort: frames.sort() return ','.join(FrameSet.framesToFrameRanges(frames, zfill))
def _import_csv(self): """ Function that read the csv in which the target of each image is written. :arg self (DataHandler): instance of the class :return df (pandas dataframe): pandas dataframe corresponding to the targets. Each columns correspond to a pathology each rows correspond to an image. """ try: df = pd.read_csv(self.label_full_path) except IOError as e: raise e df = df.iloc[:, [0, 1]] split_data = df['Finding Labels'].str.split('|') list1 = split_data.to_list() flat_list = [item for sublist in list1 for item in sublist] unique_list = unique(flat_list) df = pd.concat([df, pd.DataFrame(columns=unique_list)], sort=False) for value in unique_list: bool_value = df['Finding Labels'].str.contains(value) df[value] = bool_value.astype(int) df = df.drop(labels=['Finding Labels'], axis=1) return df
def index(): # initialize FlaskForm from flask_wtf if request.method == "GET": # make form form = UsernamesForm() return render_template('index.html', data=json.dumps(usernames), recs=None, form=form) if request.method == "POST": # make form form = UsernamesForm(request.form) # validate form input if form.validate_on_submit(): # if valid, get recommendations query = form.username_input.data follows = [ x.strip() for x in query.split(",") if x.strip() in usernames ] follows = unique(follows) query = "+".join(follows) return redirect(url_for('results', query=query)) else: # flash error message flash('Enter a valid username') return redirect(url_for('index'))
def gen_exclusive_list(self): if hasattr(default, 'exclusive_list'): self.exclusive_list = default.exclusive_list return self.exclusive_list = [] all_d_terms = [t['term'] for t in self.ontology] for p in range(self.num_labels): idx = all_d_terms.index(self.term_list[p]) self.exclusive_list.append([self.term_list.index(ex) for ex in self.ontology[idx]['exclusive'] if ex in self.term_list]) # if labels A and B are exclusive, any child of A and any child of B should also be exclusive while True: flag = False for p in range(self.num_labels): cur_ex = self.exclusive_list[p] next_ex = cur_ex[:] for ex in cur_ex: next_ex += self.all_children_list[ex] for parent in self.parent_list[p]: next_ex += self.exclusive_list[parent] next_ex = unique(next_ex) flag = flag or (set(next_ex) != set(cur_ex)) self.exclusive_list[p] = next_ex if not flag: break print '%d mutually exclusive pairs extracted' % (len([p1 for p in self.exclusive_list for p1 in p]) / 2) default.exclusive_list = self.exclusive_list
def get_tags(): g.user.reload() # To capture tags that has been just added base_tags = [t["title"] for t in Tags.objects.get_base_tags()] # using unique here to maintain order by popularity for base tags return filter(None, unique(base_tags + g.user.tags))
def intersectConvexPolygons(polygon1, polygon2): ps = [] for i in range(0, len(polygon1)): ps.append(polygon1[i]) for i in range(0, len(polygon2)): ps.append(polygon2[i]) for i in range(len(polygon1)): seg1 = GeneralizedSegment( polygon1[i], polygon1[(i + 1) if i + 1 < len(polygon1) else 0], False, False) for j in range(0, len(polygon2)): seg2 = GeneralizedSegment( polygon2[j], polygon2[j + 1 if j + 1 < len(polygon2) else 0], False, False) uk = segmentIntersection(seg1, seg2) if (uk): ps.append(uk) ans = [] for i in range(0, len(ps)): if (isInConvexPolygon(ps[i], polygon1) and isInConvexPolygon(ps[i], polygon2)): ans.append(ps[i]) pans = [ans[0]] pans.extend(utils.sort(ans[1:], ByPolarAngleAroundCenterComparator(ans[0]))) ans = utils.unique(pans) return ans
def main(): cmd_args = parse_args() idx_list = [] for input in cmd_args.input: with h5py.File(input, "r") as f: idx = f["protein_id"][...] idx_list.append(utils.unique(idx)) common_idx = functools.reduce(np.intersect1d, idx_list) if cmd_args.seed is not None: np.random.seed(cmd_args.seed) kf = KFold(n_splits=cmd_args.k_fold, shuffle=True, random_state=cmd_args.seed) if not os.path.exists(cmd_args.output_path): os.makedirs(cmd_args.output_path) current_fold = 0 for train_idx, test_idx in kf.split(common_idx): with h5py.File( os.path.join(cmd_args.output_path, "fold%d.h5" % current_fold), "w") as f: val_size = np.round(len(train_idx) * cmd_args.val_frac).astype( np.int) val_idx = np.random.choice(train_idx, val_size, replace=False) train_idx = np.setdiff1d(train_idx, val_idx) f.create_dataset("train", data=common_idx[train_idx]) f.create_dataset("val", data=common_idx[val_idx]) f.create_dataset("test", data=common_idx[test_idx]) current_fold += 1
def relabeling(actualClass, predictedClass): # actualClass = Reference cluster label vector. # predictedClass = Query cluster label vector. # mappedClass = Query vector after mapping. mappedClass = predictedClass minLabel = list_min(predictedClass) maxLabel = list_max(predictedClass) for i in range(minLabel, maxLabel): a = multiIndexOf(predictedClass, i) b = actualClass(a) x = unique(b) v = i maxm = -9 for j in range(0, len(x) - 1): t = x[j] y = len(multiIndexOf(b, t)) if y > maxm: v = t maxm = y mappedClass[a] = v return mappedClass
def policy_gradient(self, crover=None, memory=None, epsilon=None): seqs = [] start = time.time() for _ in range(self.replay): seq = self.agent.evolve1(self.batch_size, epsilon=epsilon, crover=crover, mutate=self.prior) seqs.append(seq) t1 = time.time() seqs = torch.cat(seqs, dim=0) if memory is not None: mems = [memory, seqs] seqs = torch.cat(mems) smiles = np.array([self.agent.voc.decode(s) for s in seqs]) # smiles = np.array(utils.canonicalize_list(smiles)) ix = utils.unique(np.array([[s] for s in smiles])) smiles = smiles[ix] seqs = seqs[torch.LongTensor(ix).to(utils.dev)] scores = self.env.calc_reward(smiles, self.scheme) if memory is not None: scores[:len(memory), 0] = 1 ix = scores[:, 0].argsort()[-self.batch_size * 4:] seqs, scores = seqs[ix, :], scores[ix, :] t2 = time.time() ds = TensorDataset(seqs, torch.Tensor(scores).to(utils.dev)) loader = DataLoader(ds, batch_size=self.n_samples, shuffle=True) self.agent.PGLoss(loader) t3 = time.time() print(t1 - start, t2 - t1, t3 - t2)
def policy_gradient(self): seqs = [] for _ in range(self.replay): seq = self.agent.sample(self.batch_size) seqs.append(seq) seqs = torch.cat(seqs, dim=0) ix = utils.unique(seqs) seqs = seqs[ix] smiles = [self.agent.voc.decode(s) for s in seqs] scores = self.env.calc_reward(smiles, self.scheme)[:, 0] ds = TensorDataset(seqs, torch.Tensor(scores - self.beta).to(utils.dev)) loader = DataLoader(ds, batch_size=self.n_samples, shuffle=True) for seq, score in loader: # Calculate gradients and make an update to the network weights self.agent.optim.zero_grad() prior_likelihood = self.prior.likelihood(seq).sum(dim=1) agent_likelihood = self.agent.likelihood(seq).sum(dim=1) augmented_likelihood = prior_likelihood + self.epsilon * score loss = torch.pow((augmented_likelihood - agent_likelihood), 2) # Calculate loss loss = loss.mean() # Add regularizer that penalizes high likelihood for the entire sequence loss_p = -(1 / agent_likelihood).mean() loss += 5 * 1e3 * loss_p loss.backward() self.agent.optim.step()
def bulletin_ac(request, boardname): def iter_results(results): if results: for r in results: yield '%s\n' % r if not request.GET.get('q'): return HttpResponse(mimetype='text/plain') # q가 없으면 q = request.GET.get('q') limit = request.GET.get('limit', 15) try: limit = int(limit) except ValueError: return HttpResponseBadRequest() if boardname == 'scrapbook': target = Like.objects.filter(Q(isHiddenUser=False) & Q(user=request.user)) acData = map(lambda b: b.bulletin.title, target.filter(bulletin__title__contains=q)[:limit]) # 타이틀 검색 acData = acData + map(lambda b: b.bulletin.writer.first_name, target.filter(bulletin__writer__first_name__contains=q)[:limit]) # 글쓴 유저 검색 elif boardname == 'my': target = Bulletin.bulletIns.filter(Q(isHiddenUser=False) & Q(writer=request.user)) acData = map(lambda b: b.title, target.filter(title__contains=q)[:limit]) # 타이틀 검색 else: board = Board.objects.get(name=boardname) target = Bulletin.bulletIns.filter(Q(isHiddenUser=False) & Q(board=board)) acData = map(lambda b: b.title, target.filter(title__contains=q)[:limit]) # 타이틀 검색 acData = acData + map(lambda b: b.writer.first_name, target.filter(writer__first_name__contains=q)[:limit]) # 글쓴 유저 검색 acData = unique(acData) return HttpResponse(iter_results(acData), mimetype='text/plain')
def main(filepath): emotions = [] if not os.path.isfile(filepath): print("File path does not exist. Exiting...".format(filepath)) sys.exit() # make it txt with open(filepath) as fp: ln_count = 0; blob = [] for line in fp: ln_count += 1 if ln_count % 10 == 0: emotions.append(emotion.get(blob)) blob = [] else: blob.append(unicode(line, "utf-8")) input = nlp.preprocess(line) input = utils.unique(input) print(input) sel.getImage(' '.join(input)) image.join(line, ln_count) if len(blob) != 0: emotions.append(emotion.get(blob)) mood = emotion.process(emotions) print(mood) song = music.getSong(mood) song = "./music/" + mood + "/" + song video.generate(song, "Awesomevideo", "mp4")
def main(voc_file='data/Voc', restore_model_from='data/Prior.ckpt', output_file='data/Prior_10k.smi', sample_size=10000): voc = Vocabulary(init_from_file=voc_file) print("Setting up networks") Agent = RNN(voc) if torch.cuda.is_available(): print("Cuda available, loading prior & agent") Agent.rnn.load_state_dict(torch.load(restore_model_from)) else: raise 'Cuda not available' SMILES = [] for n in tqdm(range(sample_size//100), total=sample_size//100): # Sample from Agent seqs, agent_likelihood, entropy = Agent.sample(100) # Remove duplicates, ie only consider unique seqs unique_idxs = unique(seqs) seqs = seqs[unique_idxs] agent_likelihood = agent_likelihood[unique_idxs] entropy = entropy[unique_idxs] smiles = seq_to_smiles(seqs, voc) SMILES += smiles if not os.path.exists(os.path.dirname(output_file)): os.makedirs(os.path.dirname(output_file)) with open(output_file, "wt") as f: [f.write(smi + '\n') for smi in SMILES] return
def getrights(fp, args): user = args.getlinstr('user', fp.accesslevelname) if user == '.': user = fp.accesslevelname try: access.raiseifnotformeduser(user) except access.AccessLevelError: return "Malformed user!" extra = [] if fp.type == 'irc': for c in fp.server.channels: try: extra += fp.channelrights('='.join(user.split('=')[:-2]), c) except IndexError: pass if 'base' not in args.lin or args.getlinstr('rights', ''): r = access.fullrights(fp, access.getrights(fp.server, user) + extra) else: r = access.getrights(fp.server, user) if args.getlinstr('rights', ''): hasrights = True for right in args.getlinstr('rights', '').split(' '): if right not in r: hasrights = False return "Yes" if hasrights else "No" return user + ' -- ' + utils.ltos(sorted( utils.unique(r, ), key=lambda x: x.strip('#-')), '; ')
def genCFile(vecs, hfile): res = """\ #include "{0}" #include <stddef.h> #include <math.h>\n\n""" res = res.format(hfile) generics = [] particulars = [] for grp in sortByType(vecs): funcs = [] defs = [] for g in grp: pfs = g.getParticularFuncs() if pfs: d = [ f.getBody(g.getType(), g.getScalar(), g.getSize()) for f in pfs ] defs.append("\n\n".join(d)) funcs.extend(g.getGenericFuncs()) particulars.append("\n\n".join(defs)) generics.append("\n\n".join( [f.getBody(grp[0].getScalar()) for f in unique(funcs)])) generics = "\n\n".join(generics) particulars = "\n\n".join(particulars) res += "\n\n".join([generics, particulars]) return res
def merge(acc, x): for key, val in x.items(): if key in acc: # conflict, merge children val = unique(acc[key] + val) acc[key] = val return acc
def __init__(self, table, header, first=True, full_table=None): self.branches = {} self.table = table self.node_type = None self.split_index = None self.leaf_class = None self.header = header # to initialize, append all ?? to full table if first: full_table = [] for att_index, _ in enumerate(header[:-1]): full_table.append( utils.unique([row[att_index] for row in table])) # get list of all class values classes = [x[-1] for x in table] c_types = utils.unique(classes) # if only one class, add leaf node if len(c_types) == 1: ut = utils.unique_table(self.table) self.node_type = LEAF self.leaf_class = c_types[0] # otherwise, use entropy to determine attribute index to split else: self.split_index = max_gain(table, header) # max_gain returns -1 if there is only one attr value in the current table if self.split_index != -1: # split on index with greatest information gain, then iterate over each attr value split_vals = utils.unique(table, col=self.split_index) self.node_type = SPLIT branch_tabs = [[y for y in table if y[self.split_index] == x] for x in split_vals] for i, branch in enumerate(branch_tabs): self.branches[split_vals[i]] = TreeNode( branch, header, first=False, full_table=full_table) else: # if only one attribute value left, create leaf node self.node_type = LEAF self.leaf_class = utils.majority_vote(table) ut = utils.unique_table(self.table)
def load(self, fpath): from exprparser import parse with open(os.path.join(config.input_directory, fpath), "rb") as f: reader = csv.reader(f) lines = skip_comment_cells(strip_rows(reader)) header = lines.next() self.expressions = [parse(s, autovariables=True) for s in header] table = [] for line in lines: if any(value == "" for value in line): raise Exception("empty cell found in %s" % fpath) table.append([eval(value) for value in line]) ndim = len(header) unique_last_d, dupe_last_d = unique_duplicate(table.pop(0)) if dupe_last_d: print( "Duplicate column header value(s) (for '%s') in '%s': %s" % (header[-1], fpath, ", ".join(str(v) for v in dupe_last_d)) ) raise Exception( "bad alignment data in '%s': found %d " "duplicate column header value(s)" % (fpath, len(dupe_last_d)) ) # strip the ndim-1 first columns headers = [[line.pop(0) for line in table] for _ in range(ndim - 1)] possible_values = [list(unique(values)) for values in headers] if ndim > 1: # having duplicate values is normal when there are more than 2 # dimensions but we need to test whether there are duplicates of # combinations. dupe_combos = list(duplicates(zip(*headers))) if dupe_combos: print("Duplicate row header value(s) in '%s':" % fpath) print(PrettyTable(dupe_combos)) raise Exception( "bad alignment data in '%s': found %d " "duplicate row header value(s)" % (fpath, len(dupe_combos)) ) possible_values.append(unique_last_d) self.possible_values = possible_values self.probabilities = list(chain.from_iterable(table)) num_possible_values = prod(len(values) for values in possible_values) if len(self.probabilities) != num_possible_values: raise Exception( "incoherent alignment data in '%s': %d data cells " "found while it should be %d based on the number " "of possible values in headers (%s)" % ( fpath, len(self.probabilities), num_possible_values, " * ".join(str(len(values)) for values in possible_values), ) )
def pl_resolve(ci, cj): clauses = [] for di in disjuncts(ci): for dj in disjuncts(cj): if di == ~dj or ~di == dj: dnew = unique( removeall(di, disjuncts(ci)) + removeall(dj, disjuncts(cj))) clauses.append(associate('|', dnew)) return clauses
def pl_resolve(ci, cj): """Return all clauses that can be obtained by resolving clauses ci and cj.""" clauses = [] for di in disjuncts(ci): for dj in disjuncts(cj): if di == ~dj or ~di == dj: dnew = unique(removeall(di, disjuncts(ci)) + removeall(dj, disjuncts(cj))) clauses.append(associate('|', dnew)) return clauses
def compute_meta_data(dataset, *datasets): datasets = [dataset] + list(datasets) uniq = OrderedDict() uniq['title_event_code'] = U.unique(datasets, column='title_event_code') uniq['title'] = U.unique(datasets, column='title') uniq['event_code'] = U.unique(datasets, column='event_code') uniq['event_id'] = U.unique(datasets, column='event_id') uniq['world'] = U.unique(datasets, column='world') uniq['type'] = U.unique(datasets, column='type') uniq['title_world'] = U.unique(datasets, column='title_world') uniq['title_type'] = U.unique(datasets, column='title_type') uniq['world_type'] = U.unique(datasets, column='world_type') asm_datasets = [ds.query('type == "Assessment"') for ds in datasets] uniq['assessment_titles'] = U.unique(asm_datasets, column='title') win_codes = {t: 4100 for t in uniq['title']} win_codes['Bird Measurer (Assessment)'] = 4110 ref_ts = dataset['timestamp'].min() meta = {'win_codes': win_codes, 'ref_ts': ref_ts, **uniq} return U.named_tuple('Meta', **meta)
def __init__(self): self.loadinfo(default.groundtruth_file) with open(default.hand_split_file, 'r') as f: data = json.load(f) self.term_list = default.term_list self.num_cls = len(self.term_list) self.smp_idxs = [d['lesion_idx'] for d in data] self.labels = [[self.term_list.index(t) for t in d['expanded_terms'] if t in self.term_list] for d in data] self.uncertain_labels = [[] for d in data] self.smp_idxs = self.smp_idxs self.labels = self.labels self.num_smp = len(self.smp_idxs) self.labels = [unique(l) for l in self.labels] self.uncertain_labels = [unique(l) for l in self.uncertain_labels] print '>>>', len(self.smp_idxs), 'hand-labeled samples,', keep = [i for i in range(len(self.smp_idxs)) if (not self.noisy[self.smp_idxs[i]]) and len(self.labels[i]) > 0] self.smp_idxs = [self.smp_idxs[i] for i in keep] self.labels = [self.labels[i] for i in keep] self.uncertain_labels = [self.uncertain_labels[i] for i in keep] print 'num of positive labels:', np.hstack(self.labels).shape[0] print 'num of uncertain labels:', np.hstack(self.uncertain_labels).shape[0] if default.generate_features_all: self.smp_idxs = range(len(self.filenames)) self.labels = [[0] for _ in self.smp_idxs] self.uncertain_labels = [[0] for _ in self.smp_idxs] print 'Fake evaluation, generating features for all 32735 lesions' self.num_smp = len(self.smp_idxs) print self.num_smp, 'after removing noisy and empty ones:', all_labels = [lb for lbs in self.labels for lb in lbs] print self.num_cls, 'classes' self.cls_sz = np.array([all_labels.count(cls) for cls in range(self.num_cls)], dtype=np.float32) if self.num_cls < 10: print 'number of positive samples:' for cls in range(self.num_cls): print self.term_list[cls], int(self.cls_sz[cls])
def fetch_articles(): from api import insert_or_update bbc = Juicer() articles = bbc.articles(after=str(date.today() - timedelta(3))) # Get related articles from the past 90 days after = str(date.today() - timedelta(90)) related = [bbc.articles(reference=article, after=after, sleep=1) for article in articles] res = insert_or_update(list(unique(list(flatten(related)) + articles, key=lambda a: a['cps_id']))) print json.dumps(res, default=json_util.default, indent=2)
def _modified_fields(self): fnames = self.predictors if not fnames: return [] fnames.insert(0, "id") temp = self.entity.temp_variables array = self.entity.array length = len(array) fields = [(k, temp[k] if k in temp else array[k]) for k in utils.unique(fnames)] return [(k, v) for k, v in fields if isinstance(v, np.ndarray) and v.shape == (length,)]
def entropy(table): ''' Calculate the entropy of a set of instances. ''' e = 0 classes = [x[-1] for x in table] c_types = utils.unique(classes) for c in c_types: c_ratio = sum([1 for x in classes if x == c]) / len(classes) if c_ratio != 0: e += c_ratio * math.log(c_ratio, 2) return -e
def read_data(x, y, g, split): with h5py.File(g, "r") as g_file: g_idx, g = utils.decode(g_file["protein_id"][...]), \ g_file["mat_bool"][...] # Confident that g is unique if x is not None: with h5py.File(x, "r") as x_file: x_idx, x = utils.unique(utils.decode(x_file["protein_id"][...]), x_file["mat"]) else: x_idx, x = g_idx, np.eye(g.shape[0]) with h5py.File(y, "r") as y_file: y_idx, y = utils.unique(utils.decode(y_file["protein_id"][...]), y_file["mat"]) xg_idx = np.intersect1d(x_idx, g_idx) x = x[utils.get_idx_mapper(x_idx)(xg_idx)] g_extract = utils.get_idx_mapper(g_idx)(xg_idx) g = g[g_extract[:, None], g_extract] y = np.concatenate( [y, np.zeros((1, ) + y.shape[1:], dtype=y.dtype.type)], axis=0) # Fill zeros if not existing y = y[utils.get_idx_mapper(y_idx)(xg_idx)] with h5py.File(split, "r") as f: train_idx = utils.decode(f["train"][...]) val_idx = utils.decode(f["val"][...]) test_idx = utils.decode(f["test"][...]) xyg_idx = np.intersect1d(xg_idx, y_idx) assert np.all(np.in1d(train_idx, xyg_idx)) \ and np.all(np.in1d(val_idx, xyg_idx)) \ and np.all(np.in1d(test_idx, xyg_idx)) train_mask = np.in1d(xg_idx, train_idx) val_mask = np.in1d(xg_idx, val_idx) test_mask = np.in1d(xg_idx, test_idx) return utils.DataDict([ ("x", x), ("y", y), ("protein_id", xg_idx), ]), g, train_mask, val_mask, test_mask
def fit(self): best = 0 log = open(self.out + '.log', 'w') last_smiles = [] last_scores = [] interval = 250 last_save = -1 for epoch in range(10000): print('\n----------\nEPOCH %d\n----------' % epoch) if epoch < interval and self.memory is not None: self.policy_gradient(crover=None, memory=self.memory, epsilon=1e-1) else: self.policy_gradient(crover=self.crover, epsilon=self.epsilon) seqs = self.agent.sample(self.n_samples) smiles = [self.agent.voc.decode(s) for s in seqs] smiles = np.array(utils.canonicalize_list(smiles)) ix = utils.unique(np.array([[s] for s in smiles])) smiles = smiles[ix] scores = self.env(smiles, is_smiles=True) desire = (scores.DESIRE).sum() / self.n_samples if self.mean_func == 'arithmetic': score = scores[self.env.keys].values.sum( ) / self.n_samples / len(self.env.keys) else: score = scores[self.env.keys].values.prod( axis=1)**(1.0 / len(self.env.keys)) score = score.sum() / self.n_samples valid = scores.VALID.sum() / self.n_samples print("Epoch: %d average: %.4f valid: %.4f unique: %.4f" % (epoch, score, valid, desire), file=log) if best < score: torch.save(self.agent.state_dict(), self.out + '.pkg') best = score last_smiles = smiles last_scores = scores last_save = epoch if epoch % interval == 0 and epoch != 0: for i, smile in enumerate(last_smiles): score = "\t".join( ['%.3f' % s for s in last_scores.values[i]]) print('%s\t%s' % (score, smile), file=log) self.agent.load_state_dict(torch.load(self.out + '.pkg')) self.crover.load_state_dict(torch.load(self.out + '.pkg')) if epoch - last_save > interval: break log.close()
def info_gain(table, att_i): ''' calculate information gain for one attribute ''' e_start = entropy(table) e_new = 0 atts = utils.unique([x[att_i] for x in table]) t_size = len(table) for a in atts: partition = [x for x in table if x[att_i] == a] p_weight = len(partition) / t_size e_new += (entropy(partition) * p_weight) return e_start - e_new
def getCurrentUserProjects(currUser): #print "Content-type:text/html" #print # get projects user has AT LEAST Read access to (i.e. if he is explicitly declared a Writer on a project but not declared a Reader, that's allowed) currReadProj = packetHandler.findMemberProjects(currUser, 'Reader') currWriteProj = packetHandler.findMemberProjects(currUser, 'Writer') publicProj = packetHandler.findAllProjects(isPrivate="FALSE") # list of Packet OBJECTS currUserWriteProjects = utils.unique(currReadProj + currWriteProj + publicProj) if currUser == 1: privateProjects = packetHandler.findAllProjects("TRUE") currUserWriteProjects = utils.unique(privateProjects + publicProj) uPackets = [] for p in currUserWriteProjects: uPackets.append(p.getNumber()) return uPackets
def _modified_fields(self): fnames = self.predictors if not fnames: return [] fnames.insert(0, 'id') temp = self.entity.temp_variables array = self.entity.array length = len(array) fields = [(k, temp[k] if k in temp else array[k]) for k in utils.unique(fnames)] return [(k, v) for k, v in fields if isinstance(v, np.ndarray) and v.shape == (length, )]
def fetch_articles(): from api import insert_or_update bbc = Juicer() articles = bbc.articles(after=str(date.today() - timedelta(3))) # Get related articles from the past 90 days after = str(date.today() - timedelta(90)) related = [ bbc.articles(reference=article, after=after, sleep=1) for article in articles ] res = insert_or_update( list( unique(list(flatten(related)) + articles, key=lambda a: a['cps_id']))) print json.dumps(res, default=json_util.default, indent=2)
def _modified_fields(self): fnames = [v.predictor for _, v in self.subprocesses if isinstance(v, Assignment)] if not fnames: return [] fnames.insert(0, 'id') temp = self.entity.temp_variables array = self.entity.array alen = len(array) fields = [(k, temp[k] if k in temp else array[k]) for k in utils.unique(fnames)] return [(k, v) for k, v in fields if isinstance(v, np.ndarray) and v.shape == (alen,)]
def main(): subreddits = [] for i, offset in enumerate(range(0, 500, 100)): print(offset) if i > 0: sleep(2) source = get_url('http://redditmetrics.com/top/offset/{offset}'.format(offset=offset)) soup = BeautifulSoup(source) cells = soup.find_all('td', string=lambda s: s[0:3] == '/r/') new_subreddits = [ cell.get_text()[3:] for cell in cells ] subreddits += new_subreddits print(new_subreddits) subreddits[:] = unique(subreddits) print('Saving these subreddits:', subreddits) save_list(subreddits, 'subreddits.txt') print('Done')
def lint_files(filenames, pass_classes, clang_args): def strip_dot_slash(d): if d.filename.startswith('./'): d.filename = d.filename[2:] return d def interesting_file(d): if os.path.isabs(d.filename): return False if d.filename.startswith('opt'): return False return True def mk_task(filename): return (filename, pass_classes, clang_args) diags = [] tasks = (mk_task(f) for f in filenames) if PARALLEL: # TODO: accept -jN and/or read process count from config pool = Pool(processes=4) try: for r in progressbar(pool.imap(worker, tasks), length=len(filenames)): diags += r except KeyboardInterrupt: pool.terminate() return [] else: for r in progressbar(imap(worker, tasks), length=len(filenames)): diags += r diags = map(strip_dot_slash, diags) diags = filter(interesting_file, diags) diags = sorted(diags, key=lambda d: d.line_number) diags = sorted(diags, key=lambda d: d.filename) return unique(diags)
def join(self, nid): logging.info("%032x is Joining, Currently at %032x", nid.int_id, self.node.int_id) next_node = self.router.route(nid, False) if next_node.id == self.node.id: nodes = self.router.get_nodes() else: try: nodes = WDHTClient(next_node.ip, next_node.port).join(nid) except TTransportException: logging.info("Call to %032x failed", next_node.int_id) self.router.remove([next_node]) return self.join(nid) nodes.append(self.node) self.router.update([nid]) return unique(nodes)
def packages(version_class, request): package_versions = (version_class.objects.filter(package__public=True, publish=True) .order_by('package__name', '-major', '-minor', '-micro')) latest_versions = unique(package_versions, key=lambda v: v.package.name, sort=False) uri = request.build_absolute_uri() data = {'packages': dict([(v.package.name, {'latest_version': '%s.%s.%s' % (v.major, v.minor, v.micro), 'latest_url': request.build_absolute_uri(v.url()), 'package_url': uri.replace('json/', '%s/json/' % v.package.name)}) for v in latest_versions])} # Validation requires Python 2.7+ # jsonschema.validate(data, PACKAGES_SCHEMA) return HttpResponse(json.dumps(data), content_type='application/json')
def display_experiment_information(self, mediadir, experiment_data): """Spawn a gui that analyses the experiment file consistency.""" mediafiles = [] # loop (dictlist) sections -> gather all wanted info section_rows = [] for section in experiment_data: section_rows.append(["blue", section["name"]]) # experiment group information if "experiment_group" in section: section_rows.append(["bold", "\texperiment_group: " + section["experiment_group"]]) # phase-level for phase in section["trial"]: dur = "duration:Inf" if "duration" in phase: dur = "duration:" + str(phase["duration"]) gc = "" if "gc_aois" in phase: gc = "Gc:" + str(phase["gc_aois"]) + "," section_rows.append("\t" + phase["tag"] + ", " + dur + ", " + gc + " stims:" + str(len(phase["stimuli"]))) # media-information media = utils.get_list_from_dict(section, "images") +\ utils.get_list_from_dict(section, "movies") +\ utils.get_list_from_dict(section, "sounds") for mf in media: mediafiles.append(os.path.join(section["mediafolder"], mf)) medialist = utils.unique(mediafiles) self.text_dialog([["h1", "Sections:"], str(len(experiment_data)), "", ["h1", "Section summaries:"]] + section_rows + ["", ["h1", "Media dependency:"]] + utils.is_file_in_filetree(mediadir, medialist))
def prv_maintain_routing(self): """ Performs periodic routing maintenance by pinging """ logging.info("Maintaining Routing Table") neighbors = self.router.routing_table.get_nodes() isDead = [] for node in neighbors: try: logging.debug("Going to ping %032x",node.int_id) WDHTClient(node.ip, node.port).ping() except TTransportException as e: isDead.append(node) logging.exception(e) isDead = unique(isDead) self.router.remove(isDead) missing_regions = self.router.routing_table.get_missing_regions() for r,val in missing_regions.iteritems(): logging.info("I am %032x. Going to call maintain on %032x, with length %s",self.node.int_id, val, len(NodeID.to_id(val))) self.maintain(NodeID.to_id(val),self.node) logging.info("Done Maintaing Routing Table") self.router.routing_table.debug()
def fullrights(fp, rights, r=True): ret = copy.deepcopy(rights) implied = {} for m in fp.server.modules: for ir in m.implicitrights: if ir not in implied: implied[ir] = [] implied[ir] += m.implicitrights[ir] for right in rights: if splitchannel(right): gright = '%,' + splitchannel(right)[1] if gright in implied: for imp in implied[gright]: if ('-' + imp.replace('%', splitchannel(right)[0]).strip('-')) not in ret: ret.append(imp.replace('%', splitchannel(right)[0])) else: if right in implied: for implication in implied[right]: if splitchannel(implication) and fp.type == '': if not fp.external(): for channel in fp.server.channels: if 'names' in channel: if fp.sp.sendernick in channel['names']: if ('-' + (channel['channel'] + ',' + splitchannel(implication)[1]).strip('-')) not in ret: ret.append(channel['channel'] + ',' + splitchannel(implication)[1]) else: if ('-' + implication.strip('-')) not in ret: ret.append(implication) if r: oldrights = rights for i in range(10): if oldrights != ret: oldrights = copy.deepcopy(ret) ret = fullrights(fp, ret, False) return utils.unique(ret)
def decide(self, process): handler = lambda ev: filter(bool, ensure_iter(self.handle_event(ev, process))) decisions = map(handler, process.unseen_events()) # list of lists of decisions return unique(flatten(decisions))
def load_ndarray(fpath, celltype=None): print(" - reading", fpath) with open(fpath, "rb") as f: reader = csv.reader(f) line_stream = skip_comment_cells(strip_rows(reader)) header = line_stream.next() str_table = [] for line in line_stream: if any(value == '' for value in line): raise Exception("empty cell found in %s" % fpath) str_table.append(line) ndim = len(header) # handle last dimension header (horizontal values) last_d_header = str_table.pop(0) # auto-detect type of values for the last d and convert them last_d_pvalues = convert_1darray(last_d_header) unique_last_d, dupe_last_d = unique_duplicate(last_d_pvalues) if dupe_last_d: print(("Duplicate column header value(s) (for '%s') in '%s': %s" % (header[-1], fpath, ", ".join(str(v) for v in dupe_last_d)))) raise Exception("bad data in '%s': found %d " "duplicate column header value(s)" % (fpath, len(dupe_last_d))) # handle other dimensions header # strip the ndim-1 first columns headers = [[line.pop(0) for line in str_table] for _ in range(ndim - 1)] headers = [convert_1darray(pvalues_str) for pvalues_str in headers] if ndim > 1: # having duplicate values is normal when there are more than 2 # dimensions but we need to test whether there are duplicates of # combinations. dupe_combos = list(duplicates(zip(*headers))) if dupe_combos: print(("Duplicate row header value(s) in '%s':" % fpath)) print((PrettyTable(dupe_combos))) raise Exception("bad alignment data in '%s': found %d " "duplicate row header value(s)" % (fpath, len(dupe_combos))) possible_values = [np.array(list(unique(pvalues))) for pvalues in headers] possible_values.append(np.array(unique_last_d)) shape = tuple(len(values) for values in possible_values) num_possible_values = prod(shape) # transform the 2d table into a 1d list str_table = list(chain.from_iterable(str_table)) if len(str_table) != num_possible_values: raise Exception("incoherent data in '%s': %d data cells " "found while it should be %d based on the number " "of possible values in headers (%s)" % (fpath, len(str_table), num_possible_values, ' * '.join(str(len(values)) for values in possible_values))) #TODO: compare time with numpy built-in conversion: # if dtype is None, numpy tries to detect the best type itself # which it does a good job of if the values are already numeric values # if dtype is provided, numpy does a good job to convert from string # values. if celltype is None: celltype = detect_column_type(str_table) data = convert_1darray(str_table, celltype) array = np.array(data, dtype=celltype) return LabeledArray(array.reshape(shape), header, possible_values)
def get_nodes(self): return unique(self.routing_table.get_nodes() + self.neighbor_set.get_neighbors())
def candidates(self): return unique([self.node] + self.get_nodes())
def get_nodes(self): return unique(self.table.values())
def __init__(self, frange): # if the user provides anything but a string, short-circuit the build if not isinstance(frange, basestring): # if it's apparently a FrameSet already, short-circuit the build if set(dir(frange)).issuperset(self.__slots__): for attr in self.__slots__: setattr(self, attr, getattr(frange, attr)) return # if it's inherently disordered, sort and build elif isinstance(frange, Set): self._items = frozenset(map(int, frange)) self._order = tuple(sorted(self._items)) self._frange = FrameSet.framesToFrameRange( self._order, sort=False, compress=False) return # if it's ordered, find unique and build elif isinstance(frange, Sequence): items = set() order = unique(items, map(int, frange)) self._order = tuple(order) self._items = frozenset(items) self._frange = FrameSet.framesToFrameRange( self._order, sort=False, compress=False) return # in all other cases, cast to a string else: try: frange = str(frange) except Exception as err: msg = 'Could not parse "{0}": cast to string raised: {1}' raise ParseException(msg.format(frange, err)) # we're willing to trim padding characters from consideration # this translation is orders of magnitude faster than prior method self._frange = str(frange).translate(None, ''.join(PAD_MAP.keys())) # because we're acting like a set, we need to support the empty set if not self._frange: self._items = frozenset() self._order = tuple() return # build the mutable stores, then cast to immutable for storage items = set() order = [] for part in self._frange.split(","): # this is to deal with leading / trailing commas if not part: continue # parse the partial range start, end, modifier, chunk = FrameSet._parse_frange_part(part) # handle batched frames (1-100x5) if modifier == 'x': frames = xfrange(start, end, chunk) frames = [f for f in frames if f not in items] order.extend(frames) items.update(frames) # handle staggered frames (1-100:5) elif modifier == ':': for stagger in xrange(chunk, 0, -1): frames = xfrange(start, end, stagger) frames = [f for f in frames if f not in items] order.extend(frames) items.update(frames) # handle filled frames (1-100y5) elif modifier == 'y': not_good = frozenset(xfrange(start, end, chunk)) frames = xfrange(start, end, 1) frames = (f for f in frames if f not in not_good) frames = [f for f in frames if f not in items] order.extend(frames) items.update(frames) # handle full ranges and single frames else: frames = xfrange(start, end, 1 if start < end else -1) frames = [f for f in frames if f not in items] order.extend(frames) items.update(frames) # lock the results into immutable internals # this allows for hashing and fast equality checking self._items = frozenset(items) self._order = tuple(order)
def get_optimal_kegs(args): ''' Gets kegs from bevmo.com finds the kegs with the optimal gallons of alcohol per USD ''' num_kegs = args['top'] beer_limit = args['limit'] num_attempts = args['attempts'] max_price = args['price'] desc_filter = args['filter'] desc_unfilter = args['unfilter'] ''' The first url to crawl and its base url ''' seed_url = 'http://www.bevmo.com/Shop/ProductList.aspx/\ Beer/Kegs/_/N-15Z1z141vn?DNID=Beer' base_url = '{url.scheme}://{url.netloc}'.format(url=urlparse(seed_url)) ''' Get initial unique page links from the seed url append base_url to them ''' ''' For info on XPaths, see: http://www.w3schools.com/xpath/xpath_syntax.asp ''' init_page_links = [] init_page_links[:] = unique(get_html(seed_url).xpath('//div[@class="Product\ ListPaging"]/a/@href')) if not init_page_links: print('Failed to retrieve the initial keg page links!') return None ''' Lists for holding links to pages of beer kegs ''' page_links = [seed_url] + map(lambda x: base_url + x, init_page_links) new_page_links = [] ''' Lists for holding links to individual beer kegs ''' beer_links = [] new_beer_links = [] ''' To keep track of already crawled beer kegs ''' crawled_beers = set() ''' List for matching --filter and --unfilter keyword arguments to keg descriptions ''' matched = [] ''' List to hold top beer kegs, the size of optimal_kegs is limited by the num_kegs argument ''' optimal_kegs = [] keg = None while len(page_links) > 0 and len(crawled_beers) < beer_limit: ''' Links are removed as they are crawled ''' page_link = page_links.pop(0) ''' Beer keg links ''' new_beer_links[:] = unique(get_html(page_link).xpath('//a[@class="Prod\ uctListItemLink"]\ /@href')) beer_links += [base_url + x for x in new_beer_links] ''' Crawl the beer keg links get the gallons of alcohol/USD ratio ''' for link in beer_links: ''' Break if the number of crawled beers exceeds the limit ''' if len(crawled_beers) >= beer_limit: break ''' Cache the BevMo beer id's to prevent duplicates ''' beer_id = link.split('/')[-1] if beer_id not in crawled_beers: ''' Create BeerKeg object ''' keg = BeerKeg(link, num_attempts, verbose=True) ''' Call keg.parse() then filter kegs by their descriptions Calling keg.parse() produces fields keg.desc, keg.price, etc keg.parse() will only parse once per keg object ''' ''' Check if price is within range if one was given ''' if max_price: keg.parse() if keg.price > max_price: ''' Move onto the next keg and ignore this one ''' continue ''' args['filter'] has words that must be in the description ''' ''' desc_filter has words that must be in the description ''' if desc_filter: keg.parse() matched = [word in keg.desc for word in desc_filter] ''' All keywords must be present for a match ''' if not all(matched): ''' Move onto the next keg and ignore this one ''' continue ''' desc_unfilter has words that can't be in the description ''' if desc_unfilter: keg.parse() matched = [word in keg.desc for word in desc_unfilter] ''' Any keyword must be present to nullify a match ''' if any(matched): ''' Move onto the next keg and ignore this one ''' continue ''' Add current beer to crawled beers ''' crawled_beers.add(beer_id) ''' Print how many kegs have been crawled ''' print('Keg {}'.format(len(crawled_beers))) ''' Gets the gallons of alcohol per USD for the keg ''' ratio = keg.get_ratio() print('') ''' Maintain a sorted list of the current top 3 kegs using heapq (heap queue algorithm) optimal_kegs holds a tuple containing the ratio and keg associated with it ''' if optimal_kegs: for opt_tuple in optimal_kegs: ''' If ratio is greater than any keg ratio currently in optimal_kegs, then add it ''' if ratio > opt_tuple[0]: if len(optimal_kegs) >= num_kegs: ''' Adds new item to list removes the smallest to maintain size ''' heapq.heappushpop(optimal_kegs, (ratio, keg)) else: heapq.heappush(optimal_kegs, (ratio, keg)) break else: ''' Will only occur for the very first keg crawled ''' heapq.heappush(optimal_kegs, (ratio, keg)) ''' Typical link: Shop/ProductList.aspx/_/N-15Z1z141vn/No-100?DNID=Beer If No- is evenly divisible by 100, it leads to more pages to add ''' if 'No-' in page_link: if int(page_link.split('No-')[1].split('?')[0]) % 100 == 0: ''' Unique new page links with their base url appended ''' new_page_links[:] = unique(get_html(page_link).xpath('//div[@cl\ ass="Produ\ ctListPagi\ ng"]/a/@hr\ ef')) page_links += [base_url + x for x in new_page_links] ''' Sort the list in descending order by ratio (index 0 in the keg tuple) ''' return sorted(optimal_kegs, key=lambda x: x[0], reverse=True)
def FoodListView(request, username): food_list_serialized = [] user = User.objects.get(username=username) food_list = Food.objects.all().select_related('user', 'restaurant', 'food') if request.GET.get('search', False): query_string = request.GET.get('search', False) food_list = food_list.filter(Q(name__icontains=query_string) | Q(description__icontains=query_string)) if request.GET.get('liked', False): food_list = food_list.filter(foods_liked__in=[user]).order_by('-id') if request.GET.get('friends_like', False): friends = [u['id'] for u in user.following.values('id')] food_list = food_list.filter(foods_liked__in=friends).order_by('-id') if request.GET.get('recommended', False): restaurants = Restaurant.objects.filter(is_recommended=True) food_list = food_list.filter(restaurant__in=restaurants).order_by('-id') if request.GET.get('following', False): restaurants = Restaurant.objects.filter(restaurants_following__in=[user]) food_list = food_list.filter(restaurant__in=restaurants).order_by('-id') if request.GET.get('friends_following', False): friends = user.following.all() restaurants = Restaurant.objects.filter(restaurants_following__in=friends) food_list = food_list.filter(restaurant__in=restaurants).order_by('-id') if request.GET.get('disliked', False): food_list = food_list.filter(foods_disliked__in=[user]).order_by('-id') if request.GET.get('explore', False): food_list = food_list.exclude(foods_liked__in=[user]).exclude(foods_disliked__in=[user]).order_by('id') if request.GET.get('dietary_ids', False): dietary_ids = request.GET.get('dietary_ids', False).split(',') food_list = food_list.filter(dietary__in=dietary_ids) if request.GET.get('cuisine_ids', False): cuisine_ids = request.GET.get('cuisine_ids', False).split(',') food_list = food_list.filter(cuisine__in=cuisine_ids) # filter by range if request.GET.get('price_max', False): price_max = request.GET.get('price_max', False) food_list = food_list.filter(price__lte=int(price_max)) if request.GET.get('price_min', False): price_min = request.GET.get('price_min', False) food_list = food_list.filter(price__gte=int(price_min)) distance_max = request.GET.get('distance_max', False) if distance_max: distance_max = float(distance_max) distance_min = request.GET.get('distance_min', False) if distance_min: distance_min = float(distance_min) #sorting by non-derived field sort = request.GET.get('sort', False) if sort == 'price': food_list = food_list.order_by('price') food_list = unique(food_list) for food in food_list: food_obj = {} food_obj['id'] = food.id food_obj['name'] = food.name # food_obj['description'] = food.description food_obj['price'] = '${0:0.2f}'.format(food.price) food_obj['dist'] = haversine(float(user.location_x), float(user.location_y), float(food.restaurant.location_x), float(food.restaurant.location_y)) food_obj['distance'] = '{0:0.2f}km'.format(food_obj['dist']) # if a distance filter has been set, we only add qualifying restaurants if distance_max: if food_obj['dist'] > distance_max: continue if distance_min: if food_obj['dist'] < distance_min: continue food_obj['photo'] = food.photo food_obj['restaurant'] = food.restaurant.name food_obj['restaurant_id'] = food.restaurant.id food_obj['dietary_ids'] = [{'id':i.id, 'name':i.name} for i in food.dietary.all()] food_obj['cuisine_ids'] = [{'id':i.id, 'name':i.name} for i in food.cuisine.all()] food_obj['is_liked'] = food in user.foods_liked.all() food_obj['num_likes'] = User.objects.filter(foods_liked__in=[food]).count() food_list_serialized.append(food_obj) # sorting by derived field if sort == 'likes': food_list_serialized = sorted(food_list_serialized, key=lambda x: x['num_likes'], reverse=True) elif sort == 'location': food_list_serialized = sorted(food_list_serialized, key=lambda x: x['dist']) return HttpResponse(json.dumps(food_list_serialized), content_type="application/json")
def RestaurantsListView(request, username): user = User.objects.get(username=username) user_restaurants_ids = [r.id for r in user.restaurants_following.all()] restaurants = Restaurant.objects.all().select_related('user', 'restaurant', 'food') restaurants_list = [] if request.GET.get('search', False): query_string = request.GET.get('search', False) restaurants = restaurants.filter(Q(name__icontains=query_string) | Q(description__icontains=query_string) | Q(location_name__icontains=query_string)) if request.GET.get('following', False): restaurants = restaurants.filter(restaurants_following__in=[user]) if request.GET.get('friends_following', False): friends = [u['id'] for u in user.following.values('id')] restaurants = restaurants.filter(restaurants_following__in=friends) if request.GET.get('recommended', False): restaurants = restaurants.filter(is_recommended=True) if request.GET.get('me_like', False): resturant_ids = user.foods_liked.values('restaurant__id') seen = set() unique_rids = [r['restaurant__id'] for r in resturant_ids if r['restaurant__id'] not in seen and not seen.add(r['restaurant__id'])] restaurants = restaurants.filter(id__in=unique_rids) if request.GET.get('friends_like', False): resturant_ids = user.following.values('foods_liked__restaurant__id') seen = set() unique_rids = [r['foods_liked__restaurant__id'] for r in resturant_ids if r['foods_liked__restaurant__id'] not in seen and not seen.add(r['foods_liked__restaurant__id'])] restaurants = restaurants.filter(id__in=unique_rids) if request.GET.get('me_review', False): resturant_ids = Review.objects.filter(user=user).values('restaurant__id') seen = set() unique_rids = [r['restaurant__id'] for r in resturant_ids if r['restaurant__id'] not in seen and not seen.add(r['restaurant__id'])] restaurants = restaurants.filter(id__in=unique_rids) if request.GET.get('recommended_people_review', False): users = User.objects.filter(is_recommended=True) resturant_ids = Review.objects.filter(user__in=users).values('restaurant__id') seen = set() unique_rids = [r['restaurant__id'] for r in resturant_ids if r['restaurant__id'] not in seen and not seen.add(r['restaurant__id'])] restaurants = restaurants.filter(id__in=unique_rids) if request.GET.get('friends_review', False): friends = user.following.all() resturant_ids = Review.objects.filter(user__in=friends).values('restaurant__id') seen = set() unique_rids = [r['restaurant__id'] for r in resturant_ids if r['restaurant__id'] not in seen and not seen.add(r['restaurant__id'])] restaurants = restaurants.filter(id__in=unique_rids) if request.GET.get('amenity_ids', False): amenity_ids = request.GET.get('amenity_ids', False).split(',') restaurants = restaurants.filter(amenities__in=amenity_ids) if request.GET.get('dietary_ids', False): dietary_ids = request.GET.get('dietary_ids', False).split(',') food_ids = Food.objects.filter(dietary__in=dietary_ids) restaurants = restaurants.filter(food__in=food_ids) if request.GET.get('cuisine_ids', False): cuisine_ids = request.GET.get('cuisine_ids', False).split(',') food_ids = Food.objects.filter(cuisine__in=cuisine_ids) restaurants = restaurants.filter(food__in=food_ids) # filter by range if request.GET.get('price_max', False): price_max = request.GET.get('price_max', False) restaurants = restaurants.filter(price_high__lte=int(price_max)) if request.GET.get('price_min', False): price_min = request.GET.get('price_min', False) restaurants = restaurants.filter(price_low__gte=int(price_min)) distance_max = request.GET.get('distance_max', False) if distance_max: distance_max = float(distance_max) distance_min = request.GET.get('distance_min', False) if distance_min: distance_min = float(distance_min) #sorting by non-derived field sort = request.GET.get('sort', False) if sort == 'price': restaurants = restaurants.extra(select={'price_range': 'price_high + price_low'}).extra(order_by=['price_range']) # get distinct restaurants restaurants = unique(restaurants) for restaurant in restaurants: restaurant_obj = {} restaurant_obj['name'] = restaurant.name restaurant_obj['id'] = restaurant.id restaurant_obj['location_name'] = restaurant.location_name restaurant_obj['location'] = {'x':restaurant.location_x, 'y':restaurant.location_y} restaurant_obj['dist'] = haversine(float(user.location_x), float(user.location_y), float(restaurant.location_x), float(restaurant.location_y)) restaurant_obj['distance'] = '{0:0.2f}km'.format(restaurant_obj['dist']) # if a distance filter has been set, we only add qualifying restaurants if distance_max: if restaurant_obj['dist'] > distance_max: continue if distance_min: if restaurant_obj['dist'] < distance_min: continue restaurant_obj['photo'] = restaurant.photo restaurant_obj['price_low'] = '${0:0.0f}'.format(restaurant.price_low) restaurant_obj['price_high'] = '${0:0.0f}'.format(restaurant.price_high) # restaurant_obj['amenities'] = [{'id': res.id, 'image': res.image} for res in restaurant.amenities.all()] # get the people following this restaurant restaurant_obj['followed_by'] = [{'user_id':person.id, 'username': person.username, 'photo': person.photo} for person in User.objects.filter(restaurants_following__in=[restaurant])[:7]] restaurant_obj['following_count'] = User.objects.filter(restaurants_following__in=[restaurant]).count() restaurant_obj['is_following'] = (restaurant.id in user_restaurants_ids) restaurant_obj['is_recommended'] = restaurant.is_recommended # ratings reviews = Review.objects.filter(restaurant__in=[restaurant]) if reviews.count(): rating = 0 for review in reviews: rating = rating + review.rating rating = rating / reviews.count() else: rating = 0 restaurant_obj['rating'] = rating restaurant_obj['reviews_count'] = reviews.count() restaurants_list.append(restaurant_obj) # sorting by derived field if sort == 'followers': restaurants_list = sorted(restaurants_list, key=lambda x: x['following_count'], reverse=True) elif sort == 'location': restaurants_list = sorted(restaurants_list, key=lambda x: x['dist']) elif sort == 'ratings': restaurants_list = sorted(restaurants_list, key=lambda x: x['rating']) return HttpResponse(json.dumps(restaurants_list), content_type="application/json")
def saveUser(self, form): db = self.__db cursor = self.__cursor hostname = self.__hostname # print "Content-type:text/html" # TEMPORARY, REMOVE AFTER DEBUGGING TO HAVE SCRIPT REDIRECT PROPERLY!!!!!! # print # DITTO # print `form` uHandler = UserHandler(db, cursor) lHandler = LabHandler(db, cursor) pHandler = ProjectDatabaseHandler(db, cursor) ucMapper = UserCategoryMapper(db, cursor) category_ID_Name_Map = ucMapper.mapCategoryIDToName() newProps = {} # Get form values userID = int(form.getvalue("userID")) newUser = uHandler.getUserByID(userID) labID = int(form.getvalue("labs")) tmpLab = lHandler.findLabByID(labID) # rest of user properties username = form.getvalue("username") firstName = form.getvalue("firstName") lastName = form.getvalue("lastName") description = firstName + " " + lastName email = form.getvalue("email") category = category_ID_Name_Map[int(form.getvalue("system_access_level"))] newProps["labID"] = labID newProps["username"] = username newProps["firstname"] = firstName newProps["lastname"] = lastName newProps["description"] = description newProps["email"] = email newProps["category"] = category try: # Now do an update on database level AND on class level: uHandler.updateUserProperties(userID, newProps) # database update # Interface level newUser.setUsername(username) newUser.setFirstName(firstName) newUser.setLastName(lastName) newUser.setDescription(description) newUser.setEmail(email) newUser.setLab(tmpLab) newUser.setCategory(category) # update list of user's projects if form.has_key("userProjectsReadonly"): # list of IDs readonlyProjects = utils.unique(form.getlist("userProjectsReadonly")) pHandler.updateUserProjects(userID, readonlyProjects, "Reader") else: # safe to assume should delete projects? pHandler.deleteMemberProjects(userID, "Reader") if form.has_key("userProjectsWrite"): writeProjects = utils.unique(form.getlist("userProjectsWrite")) pHandler.updateUserProjects(userID, writeProjects, "Writer") else: # safe to assume should delete projects? pHandler.deleteMemberProjects(userID, "Writer") # think about this # newUser.setReadProjects(readProjects) # newUser.setWriteProjects(writeProjects) # return to detailed view self.printUserInfo("view", newUser) # utils.redirect(hostname + "User.php?View=3&fd=" + filename) except DuplicateUsernameException: # return to the view with input values and error message # Need to construct a dummy User instance to save form values for error output on the next page (otherwise they're lost as soon as Submit is pressed and creation view is exited) newLab = lHandler.findLabByID(labID) newUser = User(userID, username, firstName, lastName, description, newLab, category, email, "") self.printUserInfo("edit", newUser, "Dup_un")
def addUser(self, form): db = self.__db cursor = self.__cursor hostname = self.__hostname mail_server = self.__mail_server # August 19, 2011 mail_programmer = self.__mail_programmer # July 30, 2010 mail_biologist = self.__mail_biologist mail_admin = self.__mail_admin # print "Content-type:text/html" # TEMPORARY, REMOVE AFTER DEBUGGING TO HAVE SCRIPT REDIRECT PROPERLY!!!!!! # print # DITTO # print `form` uHandler = UserHandler(db, cursor) lHandler = LabHandler(db, cursor) pHandler = ProjectDatabaseHandler(db, cursor) ucMapper = UserCategoryMapper(db, cursor) category_Name_ID_Map = ucMapper.mapCategoryNameToID() # Get form values labID = int(form.getvalue("labs")) username = form.getvalue("username") firstName = form.getvalue("firstName") lastName = form.getvalue("lastName") description = firstName + " " + lastName to_email = form.getvalue("email") from_email = mail_admin # Change July 30, 2010 - random password generator # passwd = form.getvalue("password") chars = string.letters + string.digits passwd = "" for i in range(10): passwd += choice(chars) # System access level: Lab default or override? # if form.getvalue("privChoiceRadio") == 'override': accessLevel = category_Name_ID_Map[form.getvalue("system_access_level")] # else: # accessLevel = lHandler.findDefaultAccessLevel(labID) newProps = {} try: # Insert User information userID = uHandler.insertUser( username, firstName, lastName, description, accessLevel, to_email, passwd, labID ) # newUser = uHandler.getUserByID(userID) tmpLab = lHandler.findLabByID(labID) # print tmpLab.getName() # Insert Project info # Sept. 11/07: Differentiate between user categories Reader and Writer - different field names if form.has_key("userProjectsReadonly"): # list of IDs readonlyProjects = utils.unique(form.getlist("userProjectsReadonly")) # print `readonlyProjects` pHandler.insertMemberProjects(userID, readonlyProjects, "Reader") elif form.has_key("userProjectsReadonlyWrite"): # list of IDs readonlyProjects = utils.unique(form.getlist("userProjectsReadonlyWrite")) # print `readonlyProjects` pHandler.insertMemberProjects(userID, readonlyProjects, "Reader") # Write projects exist only for Writers if form.has_key("userProjectsWrite"): writeProjects = utils.unique(form.getlist("userProjectsWrite")) pHandler.insertMemberProjects(userID, writeProjects, "Writer") # don't assign projects to a User instance - will retrieve them from db in output function newUser = User( userID, username, firstName, lastName, description, tmpLab, form.getvalue("system_access_level"), to_email, passwd, [], [], ) email_subject = "OpenFreezer User Account" msg = email.MIMEMultipart.MIMEMultipart("alternative") msg["Subject"] = email_subject msg["To"] = to_email msgText = ( "Hi " + firstName + ",<BR><BR>An OpenFreezer account has been created for you. Your access level is " + form.getvalue("system_access_level") + ", so you can " ) if form.getvalue("system_access_level") == "Reader": msgText += "search for clones. If you wish to add/modify reagents or create projects, please contact the administrator to upgrade your access level.<BR>" elif form.getvalue("system_access_level") == "Writer": msgText += "search, add, and modify reagents. If you wish to create projects, please contact the administrator to upgrade your access level.<BR>" elif form.getvalue("system_access_level") == "Creator": msgText += "search for clones, add and modify reagents, as well as create your own projects.<BR>" ##################################################### # CHANGE TEXT AS NEEDED ##################################################### msgText += ( "<BR>The URL to access the system is <a href='" + hostname + "'>" + hostname + "</a>. Your username is <b>" + username + "</b>, and your temporary password is <b>" + passwd + "</b>. Please <u>change the temporary password as soon as you log into the website</u> - you can do it through the 'Change your password' link under the 'User Management' menu section.<BR><BR>Please refer to http://openfreezer.org for additional support.<BR><BR>Sincerely,<BR>OpenFreezer support team.<BR><BR><span style='font-family:Courier; font-size:10pt;'><HR>This is an automatically generated e-mail message. Please do not reply to this e-mail. All questions should be directed to your local administrator.</span>" ) msgText = email.MIMEText.MIMEText(msgText, "html") msg.attach(msgText) server = smtplib.SMTP(mail_server) server.set_debuglevel(1) server.sendmail(from_email, [to_email], msg.as_string()) server.quit() self.printUserInfo("view", newUser) except DeletedUserException: # Without asking too many questions, reactivate the deleted user and overwrite his/her attributes with the form input values userID = uHandler.findUserIDByUsername(username) newProps["firstname"] = firstName newProps["lastname"] = lastName newProps["description"] = description newProps["email"] = email newProps["status"] = "ACTIVE" newProps["password"] = passwd # Insert new database values and create new object uHandler.updateUserProperties(userID, newProps) # database update newUser = uHandler.getUserByID(userID) # Insert Project info readProjects = [] writeProjects = [] if form.has_key("userProjectsReadonly"): # list of IDs readonlyProjects = form.getlist("userProjectsReadonly") for r in readonlyProjects: pHandler.addProjectMember(r, userID, "Reader") # tmpReadProject = pHandler.findPacket(r) # readProjects.append(tmpReadProject) # newUser.addProject(tmpReadProject, 'read') if form.has_key("userProjectsWrite"): writeProjects = form.getlist("userProjectsWrite") for w in writeProjects: pHandler.addProjectMember(w, userID, "Writer") # tmpWriteProject = pHandler.findPacket(w) # writeProjects.append(tmpWriteProject) # newUser.addProject(tmpWriteProject, 'write') # newUser.setReadProjects(readProjects) # newUser.setWriteProjects(writeProjects) self.printUserInfo("view", newUser) # utils.redirect(hostname + "User.php?View=3&fd=" + filename) except DuplicateUsernameException: # return to the view with input values and error message # Need to construct a dummy User instance to save form values for error output on the next page (otherwise they're lost as soon as Submit is pressed and creation view is exited) newLab = lHandler.findLabByID(labID) newUser = User(0, username, firstName, lastName, description, newLab, "", email, passwd) self.printUserInfo("create", newUser)