Example #1
0
def beta(cluster, data):
    # data: Data points
    # cluster: cluster centers
    distance = euclidean_distances(data, cluster)

    resultedClassLabel = []
    for i in range(len(data)):
        idx, val = list_min(distance[i])
        resultedClassLabel.append(idx)

    result = 0.0
    for i in unique(resultedClassLabel):
        indexList = multiIndexOf(resultedClassLabel, i)
        elementCount = len(indexList)
        if elementCount <= 1:
            continue

        #_data = data[indexList]
        #_data = list(map(list(data).__getitem__, indexList))
        _data = [data.loc[i] for i in indexList]
        _distance = euclidean_distances(_data, _data)**2
        result = result + (sum(sum(_distance)) / (elementCount *
                                                  (elementCount - 1)))

    #ret = "{0:.3f}".format(float(result / (len(unique(resultedClassLabel)))))
    ret = '{:0,.2f}'.format(result / (len(unique(resultedClassLabel))))
    return ret
Example #2
0
def read_data(x, y, split):
    with h5py.File(split, "r") as f:
        train_idx = utils.decode(f["train"][...])
        val_idx = utils.decode(f["val"][...])
        test_idx = utils.decode(f["test"][...])
        all_idx = np.concatenate([train_idx, val_idx, test_idx], axis=0)

    with h5py.File(x, "r") as f:
        idx, mat = utils.unique(utils.decode(f["protein_id"][...]), f["mat"])
        assert np.all(np.in1d(all_idx, idx))
        idx_mapper = utils.get_idx_mapper(idx)
        x_train = mat[idx_mapper(train_idx)]
        x_val = mat[idx_mapper(val_idx)]
        x_test = mat[idx_mapper(test_idx)]

    with h5py.File(y, "r") as f:
        idx, mat = utils.unique(utils.decode(f["protein_id"][...]), f["mat"])
        assert np.all(np.in1d(all_idx, idx))
        idx_mapper = utils.get_idx_mapper(idx)
        y_train = mat[idx_mapper(train_idx)]
        y_val = mat[idx_mapper(val_idx)]
        y_test = mat[idx_mapper(test_idx)]

    return utils.DataDict([("x", x_train), ("y", y_train),
                           ("protein_id", train_idx)]), utils.DataDict([
                               ("x", x_val), ("y", y_val),
                               ("protein_id", val_idx)
                           ]), utils.DataDict([("x", x_test), ("y", y_test),
                                               ("protein_id", test_idx)])
    def __init__(self, split):
        self.train = split == 'train'
        self.loadinfo(default.groundtruth_file)

        fn = default.split_file
        with open(fn, 'r') as f:
            data = json.load(f)
            print 'loaded', fn
        self.term_list = data['term_list']
        self.num_labels = len(self.term_list)

        prefix = split
        self.smp_idxs, self.labels, self.uncertain_labels = \
            data['%s_lesion_idxs'%prefix], data['%s_relevant_labels'%prefix], \
            data['%s_uncertain_labels'%prefix]
        if self.train:
            self.irrelevant_labels = data['train_irrelevant_labels']
        self.num_smp = len(self.smp_idxs)

        if not hasattr(default, 'ontology'):
            default.ontology = load_ontology_from_xlsfile(default.ontology_file)
        self.ontology = default.ontology
        self.gen_parents_list()
        self.gen_children_list()
        self.gen_exclusive_list()

        if config.TRAIN.TEXT_MINED_LABEL == 'RUI' and self.train:
            self.labels = [r+u+i for r,u,i in zip(self.labels, self.uncertain_labels, self.irrelevant_labels)]
        elif config.TRAIN.TEXT_MINED_LABEL == 'RU' and self.train:
            self.labels = [r+u for r,u in zip(self.labels, self.uncertain_labels)]
        self.labels = [unique(l) for l in self.labels]
        self.uncertain_labels = [unique(u) for u in self.uncertain_labels]

        terms_all = [d['term'] for d in self.ontology]
        self.term_class = [self.ontology[terms_all.index(t)]['class'] for t in self.term_list]

        print '>>>', len(self.smp_idxs), prefix, 'samples,',
        keep = [i for i in range(len(self.smp_idxs))
                if (not self.noisy[self.smp_idxs[i]])
                   and len(self.labels[i]) > 0]
        self.smp_idxs = [self.smp_idxs[i] for i in keep]
        self.num_smp = len(self.smp_idxs)
        print self.num_smp, 'after removing noisy and empty ones:'

        print self.num_labels, 'labels,',
        self.labels = [self.labels[i] for i in keep]
        self.uncertain_labels = [self.uncertain_labels[i] for i in keep]
        print '%d relevant cases,' % np.hstack(self.labels).shape[0],
        print '%d uncertain cases.' % np.hstack(self.uncertain_labels).shape[0]

        if default.generate_features_all:
            self.smp_idxs = range(len(self.filenames))
            self.labels = [[0] for _ in self.smp_idxs]
            self.uncertain_labels = [[0] for _ in self.smp_idxs]
            print 'Fake evaluation, generating features for all 32735 lesions'

        all_labels = [lb for lbs in self.labels for lb in lbs]
        self.cls_sz = np.array([all_labels.count(cls) for cls in range(self.num_labels)], dtype=np.float32)
        self.gen_class_weights()
        print
    def __init__(self, table, header):
        self.branches = {}
        self.table = table
        self.node_type = None
        self.split_index = None
        self.leaf_class = None
        self.header = header

        classes = [x[-1] for x in table]
        c_types = u.unique(classes)

        if len(c_types) == 1:
            self.node_type = LEAF
            self.leaf_class = c_types[0]
            # print("Creating leaf: ")
            # print(self.table)
            # print(self.leaf_class)

        else:
            self.split_index = max_gain(table, header)
            if self.split_index != -1:
                split_vals = u.unique(table, col=self.split_index)
                self.node_type = SPLIT
                branch_tabs = [[y for y in table if y[self.split_index] == x]
                               for x in split_vals]
                for i, bran in enumerate(branch_tabs):
                    self.branches[split_vals[i]] = TreeNode(bran, header)
            else:
                self.node_type = LEAF
                self.leaf_class = u.majority_vote(table)
Example #5
0
 def update(self, nodes):
     logging.debug("Adding nodes %s",' '.join(
                         ["%032x"%(x.int_id) for x in nodes]))
     self.cw.extend(nodes)
     self.ccw.extend(nodes)
     self.cw = unique(self.cw)
     self.ccw = unique(self.ccw)
     self.cw.sort(key = self.cw_distance)
     self.ccw.sort(key = self.ccw_distance)
     self.cw = self.cw[:self.size]
     self.ccw = self.ccw[:self.size]
Example #6
0
def estimate_parameters(series, min_size_series=50, discrete=False):
    """
    
    Apply Clauset et al.'s method to find the best fit value of xmin and Alpha.

    **Parameters**

        series : series of data to be fit.
        
        min_size_series : Minimum possible size of the distribution to which power-law fit will be attempted. Fitting power-law to a very small series would give biased results where power-law may appear to be a good fit even when data is not drawn from power-law distribution. The default value is taken to be 50 as suggested in the paper.

        discrete : Boolean, whether to treat series as discrete or continous. Default value is False

    **Returns**

        Tuple of (Estimated xmin, Estimated Alpha value, minimum KS statistics score).

    """

    sorted_series = sorted(series)
    xmin_candidates = []
    x_prev = sorted_series[0]
    xmin_candidates.append(x_prev)
    for x in sorted_series:
        if (x > x_prev):
            x_prev = x
            xmin_candidates.append(x_prev)

    ks_statistics_min = sys.maxint
    xmin_result = 0
    Alpha_result = 2
    for xmin in xmin_candidates[:-1 * (min_size_series - 1)]:
        data = filter(lambda x: x >= xmin, sorted_series)
        estimated_Alpha = estimate_scaling_parameter(data, xmin)
        if (discrete):
            Px = [
                zeta(estimated_Alpha, x) / zeta(estimated_Alpha, xmin)
                for x in unique(data)
            ]
        else:
            Px = [
                pow(float(x) / xmin, 1 - estimated_Alpha) for x in unique(data)
            ]
        n = len(Px)
        Sx = [i[1] / n for i in frequency_distribution(data, pdf=False)]
        ks_statistics = max(
            map(lambda counter: abs(Sx[counter] - Px[counter]), range(0, n)))
        if (ks_statistics < ks_statistics_min):
            ks_statistics_min = ks_statistics
            xmin_result = xmin
            Alpha_result = estimated_Alpha

    return (xmin_result, Alpha_result, ks_statistics_min)
Example #7
0
    def fit(self):
        best = 0
        last_save = 0
        log = open(self.out + '.log', 'w')
        for epoch in range(1000):
            print('\n----------\nEPOCH %d\n----------' % epoch)
            self.policy_gradient()
            seqs = self.agent.sample(self.n_samples)
            ix = utils.unique(seqs)
            smiles = [self.agent.voc.decode(s) for s in seqs[ix]]
            scores = self.env(smiles, is_smiles=True)

            desire = (scores.DESIRE).sum() / self.n_samples
            score = scores[self.env.keys].values.mean()
            valid = scores.VALID.mean()

            if best <= score:
                torch.save(self.agent.state_dict(), self.out + '.pkg')
                best = score
                last_save = epoch

            print("Epoch: %d average: %.4f valid: %.4f unique: %.4f" %
                  (epoch, score, valid, desire),
                  file=log)
            for i, smile in enumerate(smiles):
                score = "\t".join(['%0.3f' % s for s in scores.values[i]])
                print('%s\t%s' % (score, smile), file=log)
            if epoch - last_save > 100:
                break
        for param_group in self.agent.optim.param_groups:
            param_group['lr'] *= (1 - 0.01)
        log.close()
Example #8
0
    def framesToFrameRange(frames, sort=True, zfill=0, compress=False):
        """
        Converts an iterator of frames into a
        :class:`fileseq.framerange.FrameRange`.

        :type frames: iterable
        :param frames: sequence of frames to process
        :type sort: bool
        :param sort: sort the sequence before processing
        :type zfill: int
        :param zfill: width for zero padding
        :type compress: bool
        :param compress: remove any duplicates before processing
        :rtype: str
        """
        if compress:
            frames = unique(set(), frames)
        frames = list(frames)
        if not frames:
            return ''
        if len(frames) == 1:
            return pad(frames[0], zfill)
        if sort:
            frames.sort()
        return ','.join(FrameSet.framesToFrameRanges(frames, zfill))
Example #9
0
    def _import_csv(self):
        """
        Function that read the csv in which the target of each image is written.

        :arg
            self (DataHandler): instance of the class

        :return
            df (pandas dataframe): pandas dataframe corresponding to the targets. Each columns correspond to a pathology
                                   each rows correspond to an image.
        """
        try:
            df = pd.read_csv(self.label_full_path)
        except IOError as e:
            raise e

        df = df.iloc[:, [0, 1]]

        split_data = df['Finding Labels'].str.split('|')
        list1 = split_data.to_list()
        flat_list = [item for sublist in list1 for item in sublist]
        unique_list = unique(flat_list)

        df = pd.concat([df, pd.DataFrame(columns=unique_list)], sort=False)

        for value in unique_list:
            bool_value = df['Finding Labels'].str.contains(value)
            df[value] = bool_value.astype(int)

        df = df.drop(labels=['Finding Labels'], axis=1)

        return df
Example #10
0
def index():
    # initialize FlaskForm from flask_wtf
    if request.method == "GET":
        # make form
        form = UsernamesForm()
        return render_template('index.html',
                               data=json.dumps(usernames),
                               recs=None,
                               form=form)

    if request.method == "POST":
        # make form
        form = UsernamesForm(request.form)

        # validate form input
        if form.validate_on_submit():
            # if valid, get recommendations
            query = form.username_input.data
            follows = [
                x.strip() for x in query.split(",") if x.strip() in usernames
            ]
            follows = unique(follows)
            query = "+".join(follows)
            return redirect(url_for('results', query=query))

        else:
            # flash error message
            flash('Enter a valid username')

            return redirect(url_for('index'))
    def gen_exclusive_list(self):
        if hasattr(default, 'exclusive_list'):
            self.exclusive_list = default.exclusive_list
            return

        self.exclusive_list = []
        all_d_terms = [t['term'] for t in self.ontology]
        for p in range(self.num_labels):
            idx = all_d_terms.index(self.term_list[p])
            self.exclusive_list.append([self.term_list.index(ex) for ex in
                                        self.ontology[idx]['exclusive'] if ex in self.term_list])

        # if labels A and B are exclusive, any child of A and any child of B should also be exclusive
        while True:
            flag = False
            for p in range(self.num_labels):
                cur_ex = self.exclusive_list[p]
                next_ex = cur_ex[:]
                for ex in cur_ex:
                    next_ex += self.all_children_list[ex]
                for parent in self.parent_list[p]:
                    next_ex += self.exclusive_list[parent]
                next_ex = unique(next_ex)
                flag = flag or (set(next_ex) != set(cur_ex))
                self.exclusive_list[p] = next_ex
            if not flag:
                break

        print '%d mutually exclusive pairs extracted' % (len([p1 for p in self.exclusive_list for p1 in p]) / 2)
        default.exclusive_list = self.exclusive_list
Example #12
0
def get_tags():
    g.user.reload()  # To capture tags that has been just added

    base_tags = [t["title"] for t in Tags.objects.get_base_tags()]

    # using unique here to maintain order by popularity for base tags
    return filter(None, unique(base_tags + g.user.tags))
Example #13
0
def intersectConvexPolygons(polygon1, polygon2):
    ps = []
    for i in range(0, len(polygon1)):
        ps.append(polygon1[i])
    for i in range(0, len(polygon2)):
        ps.append(polygon2[i])

    for i in range(len(polygon1)):
        seg1 = GeneralizedSegment(
            polygon1[i], polygon1[(i + 1) if i + 1 < len(polygon1) else 0],
            False, False)
        for j in range(0, len(polygon2)):
            seg2 = GeneralizedSegment(
                polygon2[j], polygon2[j + 1 if j + 1 < len(polygon2) else 0],
                False, False)
            uk = segmentIntersection(seg1, seg2)
            if (uk):
                ps.append(uk)

    ans = []
    for i in range(0, len(ps)):
        if (isInConvexPolygon(ps[i], polygon1)
                and isInConvexPolygon(ps[i], polygon2)):

            ans.append(ps[i])

    pans = [ans[0]]
    pans.extend(utils.sort(ans[1:],
                           ByPolarAngleAroundCenterComparator(ans[0])))
    ans = utils.unique(pans)
    return ans
Example #14
0
def main():
    cmd_args = parse_args()
    idx_list = []
    for input in cmd_args.input:
        with h5py.File(input, "r") as f:
            idx = f["protein_id"][...]
            idx_list.append(utils.unique(idx))
    common_idx = functools.reduce(np.intersect1d, idx_list)
    if cmd_args.seed is not None:
        np.random.seed(cmd_args.seed)
    kf = KFold(n_splits=cmd_args.k_fold,
               shuffle=True,
               random_state=cmd_args.seed)
    if not os.path.exists(cmd_args.output_path):
        os.makedirs(cmd_args.output_path)
    current_fold = 0
    for train_idx, test_idx in kf.split(common_idx):
        with h5py.File(
                os.path.join(cmd_args.output_path, "fold%d.h5" % current_fold),
                "w") as f:
            val_size = np.round(len(train_idx) * cmd_args.val_frac).astype(
                np.int)
            val_idx = np.random.choice(train_idx, val_size, replace=False)
            train_idx = np.setdiff1d(train_idx, val_idx)
            f.create_dataset("train", data=common_idx[train_idx])
            f.create_dataset("val", data=common_idx[val_idx])
            f.create_dataset("test", data=common_idx[test_idx])
        current_fold += 1
Example #15
0
def relabeling(actualClass, predictedClass):
    # actualClass      = Reference cluster label vector.
    # predictedClass   = Query cluster label vector.
    # mappedClass      = Query vector after mapping.

    mappedClass = predictedClass

    minLabel = list_min(predictedClass)
    maxLabel = list_max(predictedClass)

    for i in range(minLabel, maxLabel):
        a = multiIndexOf(predictedClass, i)
        b = actualClass(a)
        x = unique(b)
        v = i
        maxm = -9
        for j in range(0, len(x) - 1):
            t = x[j]
            y = len(multiIndexOf(b, t))

            if y > maxm:
                v = t
                maxm = y

        mappedClass[a] = v

    return mappedClass
Example #16
0
    def policy_gradient(self, crover=None, memory=None, epsilon=None):
        seqs = []
        start = time.time()
        for _ in range(self.replay):
            seq = self.agent.evolve1(self.batch_size,
                                     epsilon=epsilon,
                                     crover=crover,
                                     mutate=self.prior)
            seqs.append(seq)
        t1 = time.time()
        seqs = torch.cat(seqs, dim=0)
        if memory is not None:
            mems = [memory, seqs]
            seqs = torch.cat(mems)
        smiles = np.array([self.agent.voc.decode(s) for s in seqs])
        # smiles = np.array(utils.canonicalize_list(smiles))
        ix = utils.unique(np.array([[s] for s in smiles]))
        smiles = smiles[ix]
        seqs = seqs[torch.LongTensor(ix).to(utils.dev)]

        scores = self.env.calc_reward(smiles, self.scheme)
        if memory is not None:
            scores[:len(memory), 0] = 1
            ix = scores[:, 0].argsort()[-self.batch_size * 4:]
            seqs, scores = seqs[ix, :], scores[ix, :]
        t2 = time.time()
        ds = TensorDataset(seqs, torch.Tensor(scores).to(utils.dev))
        loader = DataLoader(ds, batch_size=self.n_samples, shuffle=True)

        self.agent.PGLoss(loader)
        t3 = time.time()
        print(t1 - start, t2 - t1, t3 - t2)
Example #17
0
    def policy_gradient(self):
        seqs = []
        for _ in range(self.replay):
            seq = self.agent.sample(self.batch_size)
            seqs.append(seq)
        seqs = torch.cat(seqs, dim=0)
        ix = utils.unique(seqs)
        seqs = seqs[ix]
        smiles = [self.agent.voc.decode(s) for s in seqs]

        scores = self.env.calc_reward(smiles, self.scheme)[:, 0]
        ds = TensorDataset(seqs,
                           torch.Tensor(scores - self.beta).to(utils.dev))
        loader = DataLoader(ds, batch_size=self.n_samples, shuffle=True)

        for seq, score in loader:
            # Calculate gradients and make an update to the network weights
            self.agent.optim.zero_grad()
            prior_likelihood = self.prior.likelihood(seq).sum(dim=1)
            agent_likelihood = self.agent.likelihood(seq).sum(dim=1)
            augmented_likelihood = prior_likelihood + self.epsilon * score
            loss = torch.pow((augmented_likelihood - agent_likelihood), 2)
            # Calculate loss
            loss = loss.mean()

            # Add regularizer that penalizes high likelihood for the entire sequence
            loss_p = -(1 / agent_likelihood).mean()
            loss += 5 * 1e3 * loss_p
            loss.backward()
            self.agent.optim.step()
Example #18
0
def bulletin_ac(request, boardname):
  def iter_results(results):
    if results:
      for r in results:
        yield '%s\n' % r

  if not request.GET.get('q'):
    return HttpResponse(mimetype='text/plain')    # q가 없으면

  q = request.GET.get('q')
  limit = request.GET.get('limit', 15)
  try:
    limit = int(limit)
  except ValueError:
    return HttpResponseBadRequest()


  if boardname == 'scrapbook':
    target = Like.objects.filter(Q(isHiddenUser=False) & Q(user=request.user))
    acData = map(lambda b: b.bulletin.title, target.filter(bulletin__title__contains=q)[:limit])    # 타이틀 검색
    acData = acData + map(lambda b: b.bulletin.writer.first_name, target.filter(bulletin__writer__first_name__contains=q)[:limit])  # 글쓴 유저 검색
  elif boardname == 'my':
    target = Bulletin.bulletIns.filter(Q(isHiddenUser=False) & Q(writer=request.user))
    acData = map(lambda b: b.title, target.filter(title__contains=q)[:limit])    # 타이틀 검색
  else:
    board = Board.objects.get(name=boardname)
    target = Bulletin.bulletIns.filter(Q(isHiddenUser=False) & Q(board=board))
    acData = map(lambda b: b.title, target.filter(title__contains=q)[:limit])    # 타이틀 검색
    acData = acData + map(lambda b: b.writer.first_name, target.filter(writer__first_name__contains=q)[:limit])  # 글쓴 유저 검색
  acData = unique(acData)
  return HttpResponse(iter_results(acData), mimetype='text/plain')
Example #19
0
def main(filepath):  
        emotions = []
	if not os.path.isfile(filepath):
            print("File path does not exist. Exiting...".format(filepath))
	    sys.exit()
        # make it txt
	with open(filepath) as fp:
            ln_count = 0;
            blob = []
            for line in fp:
                ln_count += 1
                if ln_count % 10 == 0:
                    emotions.append(emotion.get(blob))
                    blob = []
                else:
                    blob.append(unicode(line, "utf-8"))
		input = nlp.preprocess(line)
		input = utils.unique(input)
                print(input)
                sel.getImage(' '.join(input))
                image.join(line, ln_count)
        if len(blob) != 0:
            emotions.append(emotion.get(blob))
        mood = emotion.process(emotions)
        print(mood)
        song = music.getSong(mood)
        song = "./music/" + mood + "/" + song
        video.generate(song, "Awesomevideo", "mp4")
Example #20
0
def main(voc_file='data/Voc',
         restore_model_from='data/Prior.ckpt',
         output_file='data/Prior_10k.smi',
         sample_size=10000):

    voc = Vocabulary(init_from_file=voc_file)
    print("Setting up networks")
    Agent = RNN(voc)

    if torch.cuda.is_available():
        print("Cuda available, loading prior & agent")
        Agent.rnn.load_state_dict(torch.load(restore_model_from))
    else:
        raise 'Cuda not available'


    SMILES = []
    for n in tqdm(range(sample_size//100), total=sample_size//100):
        # Sample from Agent
        seqs, agent_likelihood, entropy = Agent.sample(100)
        # Remove duplicates, ie only consider unique seqs
        unique_idxs = unique(seqs)
        seqs = seqs[unique_idxs]
        agent_likelihood = agent_likelihood[unique_idxs]
        entropy = entropy[unique_idxs]
        smiles = seq_to_smiles(seqs, voc)
        SMILES += smiles

    if not os.path.exists(os.path.dirname(output_file)):
        os.makedirs(os.path.dirname(output_file))

    with open(output_file, "wt") as f:
        [f.write(smi + '\n') for smi in SMILES]

    return
Example #21
0
    def framesToFrameRange(frames, sort=True, zfill=0, compress=False):
        """
        Converts an iterator of frames into a
        :class:`fileseq.framerange.FrameRange`.

        :type frames: iterable
        :param frames: sequence of frames to process
        :type sort: bool
        :param sort: sort the sequence before processing
        :type zfill: int
        :param zfill: width for zero padding
        :type compress: bool
        :param compress: remove any duplicates before processing
        :rtype: str
        """
        if compress:
            frames = unique(set(), frames)
        frames = list(frames)
        if not frames:
            return ''
        if len(frames) == 1:
            return pad(frames[0], zfill)
        if sort:
            frames.sort()
        return ','.join(FrameSet.framesToFrameRanges(frames, zfill))
Example #22
0
def getrights(fp, args):
    user = args.getlinstr('user', fp.accesslevelname)
    if user == '.':
        user = fp.accesslevelname
    try:
        access.raiseifnotformeduser(user)
    except access.AccessLevelError:
        return "Malformed user!"
    extra = []
    if fp.type == 'irc':
        for c in fp.server.channels:
            try:
                extra += fp.channelrights('='.join(user.split('=')[:-2]), c)
            except IndexError:
                pass
    if 'base' not in args.lin or args.getlinstr('rights', ''):
        r = access.fullrights(fp, access.getrights(fp.server, user)
        + extra)
    else:
        r = access.getrights(fp.server, user)
    if args.getlinstr('rights', ''):
        hasrights = True
        for right in args.getlinstr('rights', '').split(' '):
            if right not in r:
                hasrights = False
        return "Yes" if hasrights else "No"
    return user + ' -- ' + utils.ltos(sorted(
        utils.unique(r,
        ), key=lambda x: x.strip('#-')), '; ')
Example #23
0
def genCFile(vecs, hfile):
    res = """\
#include "{0}"
#include <stddef.h>
#include <math.h>\n\n"""
    res = res.format(hfile)
    generics = []
    particulars = []
    for grp in sortByType(vecs):
        funcs = []
        defs = []
        for g in grp:
            pfs = g.getParticularFuncs()
            if pfs:
                d = [
                    f.getBody(g.getType(), g.getScalar(), g.getSize())
                    for f in pfs
                ]
                defs.append("\n\n".join(d))
            funcs.extend(g.getGenericFuncs())
        particulars.append("\n\n".join(defs))
        generics.append("\n\n".join(
            [f.getBody(grp[0].getScalar()) for f in unique(funcs)]))

    generics = "\n\n".join(generics)
    particulars = "\n\n".join(particulars)
    res += "\n\n".join([generics, particulars])
    return res
Example #24
0
 def merge(acc, x):
     for key, val in x.items():
         if key in acc:
             # conflict, merge children
             val = unique(acc[key] + val)
         acc[key] = val
     return acc
Example #25
0
    def __init__(self, table, header, first=True, full_table=None):
        self.branches = {}
        self.table = table
        self.node_type = None
        self.split_index = None
        self.leaf_class = None
        self.header = header

        # to initialize, append all ?? to full table
        if first:
            full_table = []
            for att_index, _ in enumerate(header[:-1]):
                full_table.append(
                    utils.unique([row[att_index] for row in table]))

        # get list of all class values
        classes = [x[-1] for x in table]
        c_types = utils.unique(classes)

        # if only one class, add leaf node
        if len(c_types) == 1:
            ut = utils.unique_table(self.table)
            self.node_type = LEAF
            self.leaf_class = c_types[0]

        # otherwise, use entropy to determine attribute index to split
        else:
            self.split_index = max_gain(table, header)
            # max_gain returns -1 if there is only one attr value in the current table

            if self.split_index != -1:
                # split on index with greatest information gain, then iterate over each attr value
                split_vals = utils.unique(table, col=self.split_index)
                self.node_type = SPLIT
                branch_tabs = [[y for y in table if y[self.split_index] == x]
                               for x in split_vals]
                for i, branch in enumerate(branch_tabs):
                    self.branches[split_vals[i]] = TreeNode(
                        branch, header, first=False, full_table=full_table)
            else:
                # if only one attribute value left, create leaf node
                self.node_type = LEAF
                self.leaf_class = utils.majority_vote(table)
                ut = utils.unique_table(self.table)
Example #26
0
    def load(self, fpath):
        from exprparser import parse

        with open(os.path.join(config.input_directory, fpath), "rb") as f:
            reader = csv.reader(f)
            lines = skip_comment_cells(strip_rows(reader))
            header = lines.next()
            self.expressions = [parse(s, autovariables=True) for s in header]
            table = []
            for line in lines:
                if any(value == "" for value in line):
                    raise Exception("empty cell found in %s" % fpath)
                table.append([eval(value) for value in line])
        ndim = len(header)
        unique_last_d, dupe_last_d = unique_duplicate(table.pop(0))
        if dupe_last_d:
            print(
                "Duplicate column header value(s) (for '%s') in '%s': %s"
                % (header[-1], fpath, ", ".join(str(v) for v in dupe_last_d))
            )
            raise Exception(
                "bad alignment data in '%s': found %d " "duplicate column header value(s)" % (fpath, len(dupe_last_d))
            )

        # strip the ndim-1 first columns
        headers = [[line.pop(0) for line in table] for _ in range(ndim - 1)]

        possible_values = [list(unique(values)) for values in headers]
        if ndim > 1:
            # having duplicate values is normal when there are more than 2
            # dimensions but we need to test whether there are duplicates of
            # combinations.
            dupe_combos = list(duplicates(zip(*headers)))
            if dupe_combos:
                print("Duplicate row header value(s) in '%s':" % fpath)
                print(PrettyTable(dupe_combos))
                raise Exception(
                    "bad alignment data in '%s': found %d " "duplicate row header value(s)" % (fpath, len(dupe_combos))
                )

        possible_values.append(unique_last_d)
        self.possible_values = possible_values
        self.probabilities = list(chain.from_iterable(table))
        num_possible_values = prod(len(values) for values in possible_values)
        if len(self.probabilities) != num_possible_values:
            raise Exception(
                "incoherent alignment data in '%s': %d data cells "
                "found while it should be %d based on the number "
                "of possible values in headers (%s)"
                % (
                    fpath,
                    len(self.probabilities),
                    num_possible_values,
                    " * ".join(str(len(values)) for values in possible_values),
                )
            )
Example #27
0
def pl_resolve(ci, cj):
    clauses = []
    for di in disjuncts(ci):
        for dj in disjuncts(cj):
            if di == ~dj or ~di == dj:
                dnew = unique(
                    removeall(di, disjuncts(ci)) +
                    removeall(dj, disjuncts(cj)))
                clauses.append(associate('|', dnew))
    return clauses
Example #28
0
def pl_resolve(ci, cj):
    """Return all clauses that can be obtained by resolving clauses ci and cj."""
    clauses = []
    for di in disjuncts(ci):
        for dj in disjuncts(cj):
            if di == ~dj or ~di == dj:
                dnew = unique(removeall(di, disjuncts(ci)) +
                              removeall(dj, disjuncts(cj)))
                clauses.append(associate('|', dnew))
    return clauses
Example #29
0
def compute_meta_data(dataset, *datasets):
    datasets = [dataset] + list(datasets)
    uniq = OrderedDict()
    uniq['title_event_code'] = U.unique(datasets, column='title_event_code')
    uniq['title'] = U.unique(datasets, column='title')
    uniq['event_code'] = U.unique(datasets, column='event_code')
    uniq['event_id'] = U.unique(datasets, column='event_id')
    uniq['world'] = U.unique(datasets, column='world')
    uniq['type'] = U.unique(datasets, column='type')
    uniq['title_world'] = U.unique(datasets, column='title_world')
    uniq['title_type'] = U.unique(datasets, column='title_type')
    uniq['world_type'] = U.unique(datasets, column='world_type')
    asm_datasets = [ds.query('type == "Assessment"') for ds in datasets]
    uniq['assessment_titles'] = U.unique(asm_datasets, column='title')
    win_codes = {t: 4100 for t in uniq['title']}
    win_codes['Bird Measurer (Assessment)'] = 4110
    ref_ts = dataset['timestamp'].min()
    meta = {'win_codes': win_codes, 'ref_ts': ref_ts, **uniq}
    return U.named_tuple('Meta', **meta)
Example #30
0
def pl_resolve(ci, cj):
    """Return all clauses that can be obtained by resolving clauses ci and cj."""
    clauses = []
    for di in disjuncts(ci):
        for dj in disjuncts(cj):
            if di == ~dj or ~di == dj:
                dnew = unique(removeall(di, disjuncts(ci)) +
                              removeall(dj, disjuncts(cj)))
                clauses.append(associate('|', dnew))
    return clauses
Example #31
0
    def __init__(self):
        self.loadinfo(default.groundtruth_file)
        with open(default.hand_split_file, 'r') as f:
            data = json.load(f)
        self.term_list = default.term_list
        self.num_cls = len(self.term_list)

        self.smp_idxs = [d['lesion_idx'] for d in data]
        self.labels = [[self.term_list.index(t) for t in d['expanded_terms'] if t in self.term_list] for d in data]
        self.uncertain_labels = [[] for d in data]
        self.smp_idxs = self.smp_idxs
        self.labels = self.labels
        self.num_smp = len(self.smp_idxs)

        self.labels = [unique(l) for l in self.labels]
        self.uncertain_labels = [unique(l) for l in self.uncertain_labels]

        print '>>>', len(self.smp_idxs), 'hand-labeled samples,',
        keep = [i for i in range(len(self.smp_idxs)) if (not self.noisy[self.smp_idxs[i]])
                and len(self.labels[i]) > 0]
        self.smp_idxs = [self.smp_idxs[i] for i in keep]
        self.labels = [self.labels[i] for i in keep]
        self.uncertain_labels = [self.uncertain_labels[i] for i in keep]
        print 'num of positive labels:', np.hstack(self.labels).shape[0]
        print 'num of uncertain labels:', np.hstack(self.uncertain_labels).shape[0]

        if default.generate_features_all:
            self.smp_idxs = range(len(self.filenames))
            self.labels = [[0] for _ in self.smp_idxs]
            self.uncertain_labels = [[0] for _ in self.smp_idxs]
            print 'Fake evaluation, generating features for all 32735 lesions'

        self.num_smp = len(self.smp_idxs)
        print self.num_smp, 'after removing noisy and empty ones:',

        all_labels = [lb for lbs in self.labels for lb in lbs]
        print self.num_cls, 'classes'
        self.cls_sz = np.array([all_labels.count(cls) for cls in range(self.num_cls)], dtype=np.float32)
        if self.num_cls < 10:
            print 'number of positive samples:'
            for cls in range(self.num_cls):
                print self.term_list[cls], int(self.cls_sz[cls])
Example #32
0
def fetch_articles():
    from api import insert_or_update
    bbc = Juicer()
    articles = bbc.articles(after=str(date.today() - timedelta(3)))
    # Get related articles from the past 90 days
    after = str(date.today() - timedelta(90))
    related = [bbc.articles(reference=article, after=after, sleep=1)
               for article in articles]
    res = insert_or_update(list(unique(list(flatten(related)) + articles,
                                       key=lambda a: a['cps_id'])))
    print json.dumps(res, default=json_util.default, indent=2)
Example #33
0
    def _modified_fields(self):
        fnames = self.predictors
        if not fnames:
            return []

        fnames.insert(0, "id")
        temp = self.entity.temp_variables
        array = self.entity.array
        length = len(array)

        fields = [(k, temp[k] if k in temp else array[k]) for k in utils.unique(fnames)]
        return [(k, v) for k, v in fields if isinstance(v, np.ndarray) and v.shape == (length,)]
Example #34
0
def entropy(table):
    '''
        Calculate the entropy of a set of instances.
    '''
    e = 0
    classes = [x[-1] for x in table]
    c_types = utils.unique(classes)
    for c in c_types:
        c_ratio = sum([1 for x in classes if x == c]) / len(classes)
        if c_ratio != 0:
            e += c_ratio * math.log(c_ratio, 2)
    return -e
Example #35
0
def read_data(x, y, g, split):
    with h5py.File(g, "r") as g_file:
        g_idx, g = utils.decode(g_file["protein_id"][...]), \
            g_file["mat_bool"][...]  # Confident that g is unique
    if x is not None:
        with h5py.File(x, "r") as x_file:
            x_idx, x = utils.unique(utils.decode(x_file["protein_id"][...]),
                                    x_file["mat"])
    else:
        x_idx, x = g_idx, np.eye(g.shape[0])
    with h5py.File(y, "r") as y_file:
        y_idx, y = utils.unique(utils.decode(y_file["protein_id"][...]),
                                y_file["mat"])
    xg_idx = np.intersect1d(x_idx, g_idx)

    x = x[utils.get_idx_mapper(x_idx)(xg_idx)]
    g_extract = utils.get_idx_mapper(g_idx)(xg_idx)
    g = g[g_extract[:, None], g_extract]
    y = np.concatenate(
        [y, np.zeros((1, ) + y.shape[1:], dtype=y.dtype.type)],
        axis=0)  # Fill zeros if not existing
    y = y[utils.get_idx_mapper(y_idx)(xg_idx)]

    with h5py.File(split, "r") as f:
        train_idx = utils.decode(f["train"][...])
        val_idx = utils.decode(f["val"][...])
        test_idx = utils.decode(f["test"][...])
        xyg_idx = np.intersect1d(xg_idx, y_idx)
        assert np.all(np.in1d(train_idx, xyg_idx)) \
            and np.all(np.in1d(val_idx, xyg_idx)) \
            and np.all(np.in1d(test_idx, xyg_idx))
        train_mask = np.in1d(xg_idx, train_idx)
        val_mask = np.in1d(xg_idx, val_idx)
        test_mask = np.in1d(xg_idx, test_idx)

    return utils.DataDict([
        ("x", x),
        ("y", y),
        ("protein_id", xg_idx),
    ]), g, train_mask, val_mask, test_mask
Example #36
0
    def fit(self):
        best = 0
        log = open(self.out + '.log', 'w')
        last_smiles = []
        last_scores = []
        interval = 250
        last_save = -1

        for epoch in range(10000):
            print('\n----------\nEPOCH %d\n----------' % epoch)
            if epoch < interval and self.memory is not None:
                self.policy_gradient(crover=None,
                                     memory=self.memory,
                                     epsilon=1e-1)
            else:
                self.policy_gradient(crover=self.crover, epsilon=self.epsilon)
            seqs = self.agent.sample(self.n_samples)
            smiles = [self.agent.voc.decode(s) for s in seqs]
            smiles = np.array(utils.canonicalize_list(smiles))
            ix = utils.unique(np.array([[s] for s in smiles]))
            smiles = smiles[ix]
            scores = self.env(smiles, is_smiles=True)

            desire = (scores.DESIRE).sum() / self.n_samples
            if self.mean_func == 'arithmetic':
                score = scores[self.env.keys].values.sum(
                ) / self.n_samples / len(self.env.keys)
            else:
                score = scores[self.env.keys].values.prod(
                    axis=1)**(1.0 / len(self.env.keys))
                score = score.sum() / self.n_samples
            valid = scores.VALID.sum() / self.n_samples

            print("Epoch: %d average: %.4f valid: %.4f unique: %.4f" %
                  (epoch, score, valid, desire),
                  file=log)
            if best < score:
                torch.save(self.agent.state_dict(), self.out + '.pkg')
                best = score
                last_smiles = smiles
                last_scores = scores
                last_save = epoch

            if epoch % interval == 0 and epoch != 0:
                for i, smile in enumerate(last_smiles):
                    score = "\t".join(
                        ['%.3f' % s for s in last_scores.values[i]])
                    print('%s\t%s' % (score, smile), file=log)
                self.agent.load_state_dict(torch.load(self.out + '.pkg'))
                self.crover.load_state_dict(torch.load(self.out + '.pkg'))
            if epoch - last_save > interval: break
        log.close()
Example #37
0
def info_gain(table, att_i):
    '''
        calculate information gain for one attribute
    '''
    e_start = entropy(table)
    e_new = 0
    atts = utils.unique([x[att_i] for x in table])
    t_size = len(table)
    for a in atts:
        partition = [x for x in table if x[att_i] == a]
        p_weight = len(partition) / t_size
        e_new += (entropy(partition) * p_weight)
    return e_start - e_new
Example #38
0
def getCurrentUserProjects(currUser):
	
	#print "Content-type:text/html"
	#print
	
	# get projects user has AT LEAST Read access to (i.e. if he is explicitly declared a Writer on a project but not declared a Reader, that's allowed)
	currReadProj = packetHandler.findMemberProjects(currUser, 'Reader')
	currWriteProj = packetHandler.findMemberProjects(currUser, 'Writer')
	publicProj = packetHandler.findAllProjects(isPrivate="FALSE")
	
	# list of Packet OBJECTS
	currUserWriteProjects = utils.unique(currReadProj + currWriteProj + publicProj)
	
	if currUser == 1:
		privateProjects = packetHandler.findAllProjects("TRUE")
		currUserWriteProjects = utils.unique(privateProjects + publicProj)
		
	uPackets = []
	
	for p in currUserWriteProjects:
		uPackets.append(p.getNumber())

	return uPackets
Example #39
0
def getCurrentUserProjects(currUser):
	
	#print "Content-type:text/html"
	#print
	
	# get projects user has AT LEAST Read access to (i.e. if he is explicitly declared a Writer on a project but not declared a Reader, that's allowed)
	currReadProj = packetHandler.findMemberProjects(currUser, 'Reader')
	currWriteProj = packetHandler.findMemberProjects(currUser, 'Writer')
	publicProj = packetHandler.findAllProjects(isPrivate="FALSE")
	
	# list of Packet OBJECTS
	currUserWriteProjects = utils.unique(currReadProj + currWriteProj + publicProj)
	
	if currUser == 1:
		privateProjects = packetHandler.findAllProjects("TRUE")
		currUserWriteProjects = utils.unique(privateProjects + publicProj)
		
	uPackets = []
	
	for p in currUserWriteProjects:
		uPackets.append(p.getNumber())

	return uPackets
Example #40
0
    def _modified_fields(self):
        fnames = self.predictors
        if not fnames:
            return []

        fnames.insert(0, 'id')
        temp = self.entity.temp_variables
        array = self.entity.array
        length = len(array)

        fields = [(k, temp[k] if k in temp else array[k])
                  for k in utils.unique(fnames)]
        return [(k, v) for k, v in fields
                if isinstance(v, np.ndarray) and v.shape == (length, )]
Example #41
0
def fetch_articles():
    from api import insert_or_update
    bbc = Juicer()
    articles = bbc.articles(after=str(date.today() - timedelta(3)))
    # Get related articles from the past 90 days
    after = str(date.today() - timedelta(90))
    related = [
        bbc.articles(reference=article, after=after, sleep=1)
        for article in articles
    ]
    res = insert_or_update(
        list(
            unique(list(flatten(related)) + articles,
                   key=lambda a: a['cps_id'])))
    print json.dumps(res, default=json_util.default, indent=2)
Example #42
0
    def _modified_fields(self):
        fnames = [v.predictor for _, v in self.subprocesses
                  if isinstance(v, Assignment)]
        if not fnames:
            return []

        fnames.insert(0, 'id')
        temp = self.entity.temp_variables
        array = self.entity.array
        alen = len(array)

        fields = [(k, temp[k] if k in temp else array[k])
                  for k in utils.unique(fnames)]
        return [(k, v) for k, v in fields
                if isinstance(v, np.ndarray) and v.shape == (alen,)]
def main():
    subreddits = []
    for i, offset in enumerate(range(0, 500, 100)): 
        print(offset)
        if i > 0: sleep(2)
        source = get_url('http://redditmetrics.com/top/offset/{offset}'.format(offset=offset))
        soup = BeautifulSoup(source)
        cells = soup.find_all('td', string=lambda s: s[0:3] == '/r/')
        new_subreddits = [ cell.get_text()[3:] for cell in cells ]
        subreddits += new_subreddits
        print(new_subreddits)

    subreddits[:] = unique(subreddits)
    print('Saving these subreddits:', subreddits)
    save_list(subreddits, 'subreddits.txt')
    print('Done')
Example #44
0
def lint_files(filenames, pass_classes, clang_args):

    def strip_dot_slash(d):
        if d.filename.startswith('./'):
            d.filename = d.filename[2:]
        return d

    def interesting_file(d):
        if os.path.isabs(d.filename):
            return False

        if d.filename.startswith('opt'):
            return False

        return True

    def mk_task(filename):
        return (filename, pass_classes, clang_args)

    diags = []
    tasks = (mk_task(f) for f in filenames)

    if PARALLEL:
        # TODO: accept -jN and/or read process count from config
        pool = Pool(processes=4)
        try:
            for r in progressbar(pool.imap(worker, tasks),
                                 length=len(filenames)):
                diags += r
        except KeyboardInterrupt:
            pool.terminate()
            return []
    else:
        for r in progressbar(imap(worker, tasks), length=len(filenames)):
            diags += r

    diags = map(strip_dot_slash, diags)
    diags = filter(interesting_file, diags)

    diags = sorted(diags, key=lambda d: d.line_number)
    diags = sorted(diags, key=lambda d: d.filename)

    return unique(diags)
Example #45
0
    def join(self, nid):
        logging.info("%032x is Joining, Currently at %032x", 
                      nid.int_id, self.node.int_id)

        next_node = self.router.route(nid, False)
        if next_node.id == self.node.id:
            nodes = self.router.get_nodes()
        else:
            try:
                nodes = WDHTClient(next_node.ip, next_node.port).join(nid)
            except TTransportException:
                logging.info("Call to %032x failed", next_node.int_id)
                self.router.remove([next_node])
                return self.join(nid)
 
        nodes.append(self.node)
        self.router.update([nid])

        return unique(nodes)
def packages(version_class, request):
    package_versions = (version_class.objects.filter(package__public=True,
                                                     publish=True)
                        .order_by('package__name', '-major', '-minor',
                                  '-micro'))
    latest_versions = unique(package_versions, key=lambda v: v.package.name,
                             sort=False)
    uri = request.build_absolute_uri()
    data = {'packages': dict([(v.package.name,
                               {'latest_version': '%s.%s.%s' % (v.major,
                                                                v.minor,
                                                                v.micro),
                                'latest_url':
                                request.build_absolute_uri(v.url()),
                                'package_url': uri.replace('json/', '%s/json/'
                                                           % v.package.name)})
                              for v in latest_versions])}
    # Validation requires Python 2.7+
    # jsonschema.validate(data, PACKAGES_SCHEMA)
    return HttpResponse(json.dumps(data), content_type='application/json')
    def display_experiment_information(self, mediadir, experiment_data):
        """Spawn a gui that analyses the experiment file consistency."""
        mediafiles = []

        # loop (dictlist) sections -> gather all wanted info
        section_rows = []
        for section in experiment_data:
            section_rows.append(["blue", section["name"]])

            # experiment group information
            if "experiment_group" in section:
                section_rows.append(["bold", "\texperiment_group: " +
                                     section["experiment_group"]])

            # phase-level
            for phase in section["trial"]:
                dur = "duration:Inf"
                if "duration" in phase:
                    dur = "duration:" + str(phase["duration"])

                gc = ""
                if "gc_aois" in phase:
                    gc = "Gc:" + str(phase["gc_aois"]) + ","

                section_rows.append("\t" + phase["tag"] + ", " + dur + ", " +
                                    gc + " stims:" +
                                    str(len(phase["stimuli"])))

            # media-information
            media = utils.get_list_from_dict(section, "images") +\
                utils.get_list_from_dict(section, "movies") +\
                utils.get_list_from_dict(section, "sounds")
            for mf in media:
                mediafiles.append(os.path.join(section["mediafolder"], mf))

        medialist = utils.unique(mediafiles)

        self.text_dialog([["h1", "Sections:"], str(len(experiment_data)), "",
                         ["h1", "Section summaries:"]] + section_rows + ["",
                         ["h1", "Media dependency:"]] +
                         utils.is_file_in_filetree(mediadir, medialist))
Example #48
0
 def prv_maintain_routing(self):
     """
     Performs periodic routing maintenance by pinging
     """
     logging.info("Maintaining Routing Table")
     neighbors = self.router.routing_table.get_nodes()
     isDead = []
     for node in neighbors:
         try:
             logging.debug("Going to ping %032x",node.int_id) 
             WDHTClient(node.ip, node.port).ping()
         except TTransportException as e:
             isDead.append(node)
             logging.exception(e)
     isDead = unique(isDead)
     self.router.remove(isDead)
     missing_regions = self.router.routing_table.get_missing_regions()
     for r,val in missing_regions.iteritems():
         logging.info("I am %032x. Going to call maintain on %032x, with length %s",self.node.int_id, val, len(NodeID.to_id(val)))
         self.maintain(NodeID.to_id(val),self.node)
     logging.info("Done Maintaing Routing Table")
     self.router.routing_table.debug() 
Example #49
0
def fullrights(fp, rights, r=True):
    ret = copy.deepcopy(rights)
    implied = {}
    for m in fp.server.modules:
        for ir in m.implicitrights:
            if ir not in implied:
                implied[ir] = []
            implied[ir] += m.implicitrights[ir]
    for right in rights:
        if splitchannel(right):
            gright = '%,' + splitchannel(right)[1]
            if gright in implied:
                for imp in implied[gright]:
                    if ('-' + imp.replace('%',
                    splitchannel(right)[0]).strip('-')) not in ret:
                        ret.append(imp.replace('%', splitchannel(right)[0]))
        else:
            if right in implied:
                for implication in implied[right]:
                    if splitchannel(implication) and fp.type == '':
                        if not fp.external():
                            for channel in fp.server.channels:
                                if 'names' in channel:
                                    if fp.sp.sendernick in channel['names']:
                                        if ('-' + (channel['channel'] +
                ',' + splitchannel(implication)[1]).strip('-')) not in ret:
                                            ret.append(channel['channel'] +
                                        ',' + splitchannel(implication)[1])
                    else:
                        if ('-' + implication.strip('-')) not in ret:
                            ret.append(implication)
    if r:
        oldrights = rights
        for i in range(10):
            if oldrights != ret:
                oldrights = copy.deepcopy(ret)
                ret = fullrights(fp, ret, False)
    return utils.unique(ret)
Example #50
0
 def decide(self, process):
     handler = lambda ev: filter(bool, ensure_iter(self.handle_event(ev, process)))
     decisions = map(handler, process.unseen_events()) # list of lists of decisions
     return unique(flatten(decisions))
Example #51
0
def load_ndarray(fpath, celltype=None):
    print(" - reading", fpath)
    with open(fpath, "rb") as f:
        reader = csv.reader(f)
        line_stream = skip_comment_cells(strip_rows(reader))
        header = line_stream.next()
        str_table = []
        for line in line_stream:
            if any(value == '' for value in line):
                raise Exception("empty cell found in %s" % fpath)
            str_table.append(line)
    ndim = len(header)

    # handle last dimension header (horizontal values)
    last_d_header = str_table.pop(0)
    # auto-detect type of values for the last d and convert them
    last_d_pvalues = convert_1darray(last_d_header)

    unique_last_d, dupe_last_d = unique_duplicate(last_d_pvalues)
    if dupe_last_d:
        print(("Duplicate column header value(s) (for '%s') in '%s': %s"
              % (header[-1], fpath,
                 ", ".join(str(v) for v in dupe_last_d))))
        raise Exception("bad data in '%s': found %d "
                        "duplicate column header value(s)"
                        % (fpath, len(dupe_last_d)))

    # handle other dimensions header

    # strip the ndim-1 first columns
    headers = [[line.pop(0) for line in str_table]
               for _ in range(ndim - 1)]
    headers = [convert_1darray(pvalues_str) for pvalues_str in headers]
    if ndim > 1:
        # having duplicate values is normal when there are more than 2
        # dimensions but we need to test whether there are duplicates of
        # combinations.
        dupe_combos = list(duplicates(zip(*headers)))
        if dupe_combos:
            print(("Duplicate row header value(s) in '%s':" % fpath))
            print((PrettyTable(dupe_combos)))
            raise Exception("bad alignment data in '%s': found %d "
                            "duplicate row header value(s)"
                            % (fpath, len(dupe_combos)))

    possible_values = [np.array(list(unique(pvalues))) for pvalues in headers]
    possible_values.append(np.array(unique_last_d))

    shape = tuple(len(values) for values in possible_values)
    num_possible_values = prod(shape)

    # transform the 2d table into a 1d list
    str_table = list(chain.from_iterable(str_table))
    if len(str_table) != num_possible_values:
        raise Exception("incoherent data in '%s': %d data cells "
                        "found while it should be %d based on the number "
                        "of possible values in headers (%s)"
                        % (fpath,
                           len(str_table),
                           num_possible_values,
                           ' * '.join(str(len(values))
                                      for values in possible_values)))

    #TODO: compare time with numpy built-in conversion:
    # if dtype is None, numpy tries to detect the best type itself
    # which it does a good job of if the values are already numeric values
    # if dtype is provided, numpy does a good job to convert from string
    # values.
    if celltype is None:
        celltype = detect_column_type(str_table)
    data = convert_1darray(str_table, celltype)
    array = np.array(data, dtype=celltype)
    return LabeledArray(array.reshape(shape), header, possible_values)
Example #52
0
 def get_nodes(self):
     return unique(self.routing_table.get_nodes() + self.neighbor_set.get_neighbors())
Example #53
0
 def candidates(self):
     return unique([self.node] + self.get_nodes())
Example #54
0
 def get_nodes(self):
     return unique(self.table.values())
Example #55
0
    def __init__(self, frange):
        # if the user provides anything but a string, short-circuit the build
        if not isinstance(frange, basestring):
            # if it's apparently a FrameSet already, short-circuit the build
            if set(dir(frange)).issuperset(self.__slots__):
                for attr in self.__slots__:
                    setattr(self, attr, getattr(frange, attr))
                return
            # if it's inherently disordered, sort and build
            elif isinstance(frange, Set):
                self._items = frozenset(map(int, frange))
                self._order = tuple(sorted(self._items))
                self._frange = FrameSet.framesToFrameRange(
                    self._order, sort=False, compress=False)
                return
            # if it's ordered, find unique and build
            elif isinstance(frange, Sequence):
                items = set()
                order = unique(items, map(int, frange))
                self._order = tuple(order)
                self._items = frozenset(items)
                self._frange = FrameSet.framesToFrameRange(
                    self._order, sort=False, compress=False)
                return
            # in all other cases, cast to a string
            else:
                try:
                    frange = str(frange)
                except Exception as err:
                    msg = 'Could not parse "{0}": cast to string raised: {1}'
                    raise ParseException(msg.format(frange, err))

        # we're willing to trim padding characters from consideration
        # this translation is orders of magnitude faster than prior method
        self._frange = str(frange).translate(None, ''.join(PAD_MAP.keys()))

        # because we're acting like a set, we need to support the empty set
        if not self._frange:
            self._items = frozenset()
            self._order = tuple()
            return

        # build the mutable stores, then cast to immutable for storage
        items = set()
        order = []

        for part in self._frange.split(","):
            # this is to deal with leading / trailing commas
            if not part:
                continue
            # parse the partial range
            start, end, modifier, chunk = FrameSet._parse_frange_part(part)
            # handle batched frames (1-100x5)
            if modifier == 'x':
                frames = xfrange(start, end, chunk)
                frames = [f for f in frames if f not in items]
                order.extend(frames)
                items.update(frames)
            # handle staggered frames (1-100:5)
            elif modifier == ':':
                for stagger in xrange(chunk, 0, -1):
                    frames = xfrange(start, end, stagger)
                    frames = [f for f in frames if f not in items]
                    order.extend(frames)
                    items.update(frames)
            # handle filled frames (1-100y5)
            elif modifier == 'y':
                not_good = frozenset(xfrange(start, end, chunk))
                frames = xfrange(start, end, 1)
                frames = (f for f in frames if f not in not_good)
                frames = [f for f in frames if f not in items]
                order.extend(frames)
                items.update(frames)
            # handle full ranges and single frames
            else:
                frames = xfrange(start, end, 1 if start < end else -1)
                frames = [f for f in frames if f not in items]
                order.extend(frames)
                items.update(frames)

        # lock the results into immutable internals
        # this allows for hashing and fast equality checking
        self._items = frozenset(items)
        self._order = tuple(order)
Example #56
0
def get_optimal_kegs(args):
    ''' Gets kegs from bevmo.com
        finds the kegs with the optimal gallons of alcohol per USD
    '''
    num_kegs = args['top']
    beer_limit = args['limit']
    num_attempts = args['attempts']
    max_price = args['price']
    desc_filter = args['filter']
    desc_unfilter = args['unfilter']

    ''' The first url to crawl and its base url '''
    seed_url = 'http://www.bevmo.com/Shop/ProductList.aspx/\
                Beer/Kegs/_/N-15Z1z141vn?DNID=Beer'
    base_url = '{url.scheme}://{url.netloc}'.format(url=urlparse(seed_url))

    ''' Get initial unique page links from the seed url
        append base_url to them
    '''

    '''     For info on XPaths, see:
            http://www.w3schools.com/xpath/xpath_syntax.asp
    '''
    init_page_links = []
    init_page_links[:] = unique(get_html(seed_url).xpath('//div[@class="Product\
                                                         ListPaging"]/a/@href'))

    if not init_page_links:
        print('Failed to retrieve the initial keg page links!')
        return None

    ''' Lists for holding links to pages of beer kegs '''
    page_links = [seed_url] + map(lambda x: base_url + x, init_page_links)
    new_page_links = []

    ''' Lists for holding links to individual beer kegs '''
    beer_links = []
    new_beer_links = []

    ''' To keep track of already crawled beer kegs '''
    crawled_beers = set()

    ''' List for matching --filter and --unfilter keyword arguments to
        keg descriptions
    '''
    matched = []

    ''' List to hold top beer kegs, the size of optimal_kegs is limited by the
        num_kegs argument
    '''
    optimal_kegs = []

    keg = None
    while len(page_links) > 0 and len(crawled_beers) < beer_limit:
        ''' Links are removed as they are crawled '''
        page_link = page_links.pop(0)

        ''' Beer keg links '''
        new_beer_links[:] = unique(get_html(page_link).xpath('//a[@class="Prod\
                                                             uctListItemLink"]\
                                                             /@href'))
        beer_links += [base_url + x for x in new_beer_links]

        ''' Crawl the beer keg links
            get the gallons of alcohol/USD ratio
        '''
        for link in beer_links:
            ''' Break if the number of crawled beers exceeds the limit '''
            if len(crawled_beers) >= beer_limit:
                break

            ''' Cache the BevMo beer id's to prevent duplicates '''
            beer_id = link.split('/')[-1]

            if beer_id not in crawled_beers:
                ''' Create BeerKeg object '''
                keg = BeerKeg(link, num_attempts, verbose=True)

                ''' Call keg.parse() then filter kegs by their descriptions
                    Calling keg.parse() produces fields keg.desc, keg.price, etc
                    keg.parse() will only parse once per keg object
                '''

                ''' Check if price is within range if one was given '''
                if max_price:
                    keg.parse()

                    if keg.price > max_price:
                        ''' Move onto the next keg and ignore this one '''
                        continue

                ''' args['filter'] has words that must be in the description '''
                ''' desc_filter has words that must be in the description '''
                if desc_filter:
                    keg.parse()

                    matched = [word in keg.desc for word in desc_filter]

                    ''' All keywords must be present for a match '''
                    if not all(matched):
                        ''' Move onto the next keg and ignore this one '''
                        continue

                ''' desc_unfilter has words that can't be in the description '''
                if desc_unfilter:
                    keg.parse()

                    matched = [word in keg.desc for word in desc_unfilter]

                    ''' Any keyword must be present to nullify a match '''
                    if any(matched):
                        ''' Move onto the next keg and ignore this one '''
                        continue

                ''' Add current beer to crawled beers '''
                crawled_beers.add(beer_id)

                ''' Print how many kegs have been crawled '''
                print('Keg {}'.format(len(crawled_beers)))

                ''' Gets the gallons of alcohol per USD for the keg '''
                ratio = keg.get_ratio()

                print('')

                ''' Maintain a sorted list of the current top 3 kegs using
                    heapq (heap queue algorithm)

                    optimal_kegs holds a tuple containing the ratio and keg
                    associated with it
                '''
                if optimal_kegs:
                    for opt_tuple in optimal_kegs:
                        ''' If ratio is greater than any keg ratio currently
                            in optimal_kegs, then add it
                        '''
                        if ratio > opt_tuple[0]:
                            if len(optimal_kegs) >= num_kegs:
                                ''' Adds new item to list
                                    removes the smallest to maintain size
                                '''
                                heapq.heappushpop(optimal_kegs, (ratio, keg))
                            else:
                                heapq.heappush(optimal_kegs, (ratio, keg))
                            break
                else:
                    ''' Will only occur for the very first keg crawled '''
                    heapq.heappush(optimal_kegs, (ratio, keg))

        ''' Typical link: Shop/ProductList.aspx/_/N-15Z1z141vn/No-100?DNID=Beer

            If No- is evenly divisible by 100, it leads to more pages to add
        '''
        if 'No-' in page_link:
            if int(page_link.split('No-')[1].split('?')[0]) % 100 == 0:
                ''' Unique new page links with their base url appended '''
                new_page_links[:] = unique(get_html(page_link).xpath('//div[@cl\
                                                                     ass="Produ\
                                                                     ctListPagi\
                                                                     ng"]/a/@hr\
                                                                     ef'))
                page_links += [base_url + x for x in new_page_links]

    ''' Sort the list in descending order by ratio
        (index 0 in the keg tuple)
    '''
    return sorted(optimal_kegs, key=lambda x: x[0], reverse=True)
Example #57
0
def FoodListView(request, username):

    food_list_serialized = []
    user = User.objects.get(username=username)

    food_list = Food.objects.all().select_related('user', 'restaurant', 'food')

    if request.GET.get('search', False):
        query_string = request.GET.get('search', False)
        food_list = food_list.filter(Q(name__icontains=query_string) | Q(description__icontains=query_string))
    if request.GET.get('liked', False):
        food_list = food_list.filter(foods_liked__in=[user]).order_by('-id')
    if request.GET.get('friends_like', False):
        friends = [u['id'] for u in user.following.values('id')]
        food_list = food_list.filter(foods_liked__in=friends).order_by('-id')
    if request.GET.get('recommended', False):
        restaurants = Restaurant.objects.filter(is_recommended=True)
        food_list = food_list.filter(restaurant__in=restaurants).order_by('-id')
    if request.GET.get('following', False):
        restaurants = Restaurant.objects.filter(restaurants_following__in=[user])
        food_list = food_list.filter(restaurant__in=restaurants).order_by('-id')
    if request.GET.get('friends_following', False):
        friends = user.following.all()
        restaurants = Restaurant.objects.filter(restaurants_following__in=friends)
        food_list = food_list.filter(restaurant__in=restaurants).order_by('-id')
    if request.GET.get('disliked', False):
        food_list = food_list.filter(foods_disliked__in=[user]).order_by('-id')
    if request.GET.get('explore', False):
        food_list = food_list.exclude(foods_liked__in=[user]).exclude(foods_disliked__in=[user]).order_by('id')
    if request.GET.get('dietary_ids', False):
        dietary_ids = request.GET.get('dietary_ids', False).split(',')
        food_list = food_list.filter(dietary__in=dietary_ids)
    if request.GET.get('cuisine_ids', False):
        cuisine_ids = request.GET.get('cuisine_ids', False).split(',')
        food_list = food_list.filter(cuisine__in=cuisine_ids)

    # filter by range
    if request.GET.get('price_max', False):
        price_max = request.GET.get('price_max', False)
        food_list = food_list.filter(price__lte=int(price_max))
    if request.GET.get('price_min', False):
        price_min = request.GET.get('price_min', False)
        food_list = food_list.filter(price__gte=int(price_min))

    distance_max = request.GET.get('distance_max', False)
    if distance_max:
        distance_max = float(distance_max)
    distance_min = request.GET.get('distance_min', False)
    if distance_min:
        distance_min = float(distance_min)

    #sorting by non-derived field
    sort = request.GET.get('sort', False)
    if sort == 'price':
        food_list = food_list.order_by('price')

    food_list = unique(food_list)

    for food in food_list:

        food_obj = {}
        food_obj['id'] = food.id
        food_obj['name'] = food.name
        # food_obj['description'] = food.description
        food_obj['price'] = '${0:0.2f}'.format(food.price)

        food_obj['dist'] = haversine(float(user.location_x), float(user.location_y), float(food.restaurant.location_x), float(food.restaurant.location_y))
        food_obj['distance'] = '{0:0.2f}km'.format(food_obj['dist'])

        # if a distance filter has been set, we only add qualifying restaurants
        if distance_max:
            if food_obj['dist'] > distance_max:
                continue
        if distance_min:
            if food_obj['dist'] < distance_min:
                continue

        food_obj['photo'] = food.photo
        food_obj['restaurant'] = food.restaurant.name
        food_obj['restaurant_id'] = food.restaurant.id
        food_obj['dietary_ids'] = [{'id':i.id, 'name':i.name} for i in food.dietary.all()]
        food_obj['cuisine_ids'] = [{'id':i.id, 'name':i.name} for i in food.cuisine.all()]

        food_obj['is_liked'] = food in user.foods_liked.all()
        food_obj['num_likes'] = User.objects.filter(foods_liked__in=[food]).count()

        food_list_serialized.append(food_obj)

    # sorting by derived field
    if sort == 'likes':
        food_list_serialized = sorted(food_list_serialized, key=lambda x: x['num_likes'], reverse=True)
    elif sort == 'location':
        food_list_serialized = sorted(food_list_serialized, key=lambda x: x['dist'])

    return HttpResponse(json.dumps(food_list_serialized), content_type="application/json")
Example #58
0
def RestaurantsListView(request, username):

    user = User.objects.get(username=username)
    user_restaurants_ids = [r.id for r in user.restaurants_following.all()]

    restaurants = Restaurant.objects.all().select_related('user', 'restaurant', 'food')
    restaurants_list = []

    if request.GET.get('search', False):
        query_string = request.GET.get('search', False)
        restaurants = restaurants.filter(Q(name__icontains=query_string) | Q(description__icontains=query_string) | Q(location_name__icontains=query_string))
    if request.GET.get('following', False):
        restaurants = restaurants.filter(restaurants_following__in=[user])
    if request.GET.get('friends_following', False):
        friends = [u['id'] for u in user.following.values('id')]
        restaurants = restaurants.filter(restaurants_following__in=friends)
    if request.GET.get('recommended', False):
        restaurants = restaurants.filter(is_recommended=True)
    if request.GET.get('me_like', False):
        resturant_ids = user.foods_liked.values('restaurant__id')
        seen = set()
        unique_rids = [r['restaurant__id'] for r in resturant_ids if r['restaurant__id'] not in seen and not seen.add(r['restaurant__id'])]
        restaurants = restaurants.filter(id__in=unique_rids)
    if request.GET.get('friends_like', False):
        resturant_ids = user.following.values('foods_liked__restaurant__id')
        seen = set()
        unique_rids = [r['foods_liked__restaurant__id'] for r in resturant_ids if r['foods_liked__restaurant__id'] not in seen and not seen.add(r['foods_liked__restaurant__id'])]
        restaurants = restaurants.filter(id__in=unique_rids)
    if request.GET.get('me_review', False):
        resturant_ids = Review.objects.filter(user=user).values('restaurant__id')
        seen = set()
        unique_rids = [r['restaurant__id'] for r in resturant_ids if r['restaurant__id'] not in seen and not seen.add(r['restaurant__id'])]
        restaurants = restaurants.filter(id__in=unique_rids)
    if request.GET.get('recommended_people_review', False):
        users = User.objects.filter(is_recommended=True)
        resturant_ids = Review.objects.filter(user__in=users).values('restaurant__id')
        seen = set()
        unique_rids = [r['restaurant__id'] for r in resturant_ids if r['restaurant__id'] not in seen and not seen.add(r['restaurant__id'])]
        restaurants = restaurants.filter(id__in=unique_rids)
    if request.GET.get('friends_review', False):
        friends = user.following.all()
        resturant_ids = Review.objects.filter(user__in=friends).values('restaurant__id')
        seen = set()
        unique_rids = [r['restaurant__id'] for r in resturant_ids if r['restaurant__id'] not in seen and not seen.add(r['restaurant__id'])]
        restaurants = restaurants.filter(id__in=unique_rids)
    if request.GET.get('amenity_ids', False):
        amenity_ids = request.GET.get('amenity_ids', False).split(',')
        restaurants = restaurants.filter(amenities__in=amenity_ids)
    if request.GET.get('dietary_ids', False):
        dietary_ids = request.GET.get('dietary_ids', False).split(',')
        food_ids = Food.objects.filter(dietary__in=dietary_ids)
        restaurants = restaurants.filter(food__in=food_ids)
    if request.GET.get('cuisine_ids', False):
        cuisine_ids = request.GET.get('cuisine_ids', False).split(',')
        food_ids = Food.objects.filter(cuisine__in=cuisine_ids)
        restaurants = restaurants.filter(food__in=food_ids)

    # filter by range
    if request.GET.get('price_max', False):
        price_max = request.GET.get('price_max', False)
        restaurants = restaurants.filter(price_high__lte=int(price_max))
    if request.GET.get('price_min', False):
        price_min = request.GET.get('price_min', False)
        restaurants = restaurants.filter(price_low__gte=int(price_min))

    distance_max = request.GET.get('distance_max', False)
    if distance_max:
        distance_max = float(distance_max)
    distance_min = request.GET.get('distance_min', False)
    if distance_min:
        distance_min = float(distance_min)

    #sorting by non-derived field
    sort = request.GET.get('sort', False)
    if sort == 'price':
        restaurants = restaurants.extra(select={'price_range': 'price_high + price_low'}).extra(order_by=['price_range'])

    # get distinct restaurants
    restaurants = unique(restaurants)

    for restaurant in restaurants:
        restaurant_obj = {}
        restaurant_obj['name'] = restaurant.name
        restaurant_obj['id'] = restaurant.id
        restaurant_obj['location_name'] = restaurant.location_name
        restaurant_obj['location'] = {'x':restaurant.location_x, 'y':restaurant.location_y}

        restaurant_obj['dist'] = haversine(float(user.location_x), float(user.location_y), float(restaurant.location_x), float(restaurant.location_y))
        restaurant_obj['distance'] = '{0:0.2f}km'.format(restaurant_obj['dist'])

        # if a distance filter has been set, we only add qualifying restaurants
        if distance_max:
            if restaurant_obj['dist'] > distance_max:
                continue
        if distance_min:
            if restaurant_obj['dist'] < distance_min:
                continue

        restaurant_obj['photo'] = restaurant.photo
        restaurant_obj['price_low'] = '${0:0.0f}'.format(restaurant.price_low)
        restaurant_obj['price_high'] = '${0:0.0f}'.format(restaurant.price_high)
        # restaurant_obj['amenities'] = [{'id': res.id, 'image': res.image} for res in restaurant.amenities.all()]


        # get the people following this restaurant
        restaurant_obj['followed_by'] = [{'user_id':person.id, 'username': person.username, 'photo': person.photo} for person in User.objects.filter(restaurants_following__in=[restaurant])[:7]]
        restaurant_obj['following_count'] = User.objects.filter(restaurants_following__in=[restaurant]).count()

        restaurant_obj['is_following'] = (restaurant.id in user_restaurants_ids)
        restaurant_obj['is_recommended'] = restaurant.is_recommended

        # ratings
        reviews = Review.objects.filter(restaurant__in=[restaurant])
        if reviews.count():
            rating = 0
            for review in reviews:
                rating = rating + review.rating
            rating = rating / reviews.count()
        else:
            rating = 0

        restaurant_obj['rating'] = rating
        restaurant_obj['reviews_count'] = reviews.count()

        restaurants_list.append(restaurant_obj)

    # sorting by derived field
    if sort == 'followers':
        restaurants_list = sorted(restaurants_list, key=lambda x: x['following_count'], reverse=True)
    elif sort == 'location':
        restaurants_list = sorted(restaurants_list, key=lambda x: x['dist'])
    elif sort == 'ratings':
        restaurants_list = sorted(restaurants_list, key=lambda x: x['rating'])

    return HttpResponse(json.dumps(restaurants_list), content_type="application/json")
    def saveUser(self, form):

        db = self.__db
        cursor = self.__cursor
        hostname = self.__hostname

        # print "Content-type:text/html"		# TEMPORARY, REMOVE AFTER DEBUGGING TO HAVE SCRIPT REDIRECT PROPERLY!!!!!!
        # print					# DITTO
        # print `form`

        uHandler = UserHandler(db, cursor)
        lHandler = LabHandler(db, cursor)
        pHandler = ProjectDatabaseHandler(db, cursor)

        ucMapper = UserCategoryMapper(db, cursor)
        category_ID_Name_Map = ucMapper.mapCategoryIDToName()

        newProps = {}

        # Get form values
        userID = int(form.getvalue("userID"))
        newUser = uHandler.getUserByID(userID)

        labID = int(form.getvalue("labs"))
        tmpLab = lHandler.findLabByID(labID)

        # rest of user properties
        username = form.getvalue("username")
        firstName = form.getvalue("firstName")
        lastName = form.getvalue("lastName")
        description = firstName + " " + lastName
        email = form.getvalue("email")
        category = category_ID_Name_Map[int(form.getvalue("system_access_level"))]

        newProps["labID"] = labID
        newProps["username"] = username
        newProps["firstname"] = firstName
        newProps["lastname"] = lastName
        newProps["description"] = description
        newProps["email"] = email
        newProps["category"] = category

        try:
            # Now do an update on database level AND on class level:
            uHandler.updateUserProperties(userID, newProps)  # database update

            # Interface level
            newUser.setUsername(username)
            newUser.setFirstName(firstName)
            newUser.setLastName(lastName)
            newUser.setDescription(description)
            newUser.setEmail(email)
            newUser.setLab(tmpLab)
            newUser.setCategory(category)

            # update list of user's projects
            if form.has_key("userProjectsReadonly"):
                # list of IDs
                readonlyProjects = utils.unique(form.getlist("userProjectsReadonly"))
                pHandler.updateUserProjects(userID, readonlyProjects, "Reader")
            else:
                # safe to assume should delete projects?
                pHandler.deleteMemberProjects(userID, "Reader")

            if form.has_key("userProjectsWrite"):
                writeProjects = utils.unique(form.getlist("userProjectsWrite"))
                pHandler.updateUserProjects(userID, writeProjects, "Writer")
            else:
                # safe to assume should delete projects?
                pHandler.deleteMemberProjects(userID, "Writer")

                # think about this
                # newUser.setReadProjects(readProjects)
                # newUser.setWriteProjects(writeProjects)

                # return to detailed view
            self.printUserInfo("view", newUser)
            # utils.redirect(hostname + "User.php?View=3&fd=" + filename)

        except DuplicateUsernameException:

            # return to the view with input values and error message
            # Need to construct a dummy User instance to save form values for error output on the next page (otherwise they're lost as soon as Submit is pressed and creation view is exited)
            newLab = lHandler.findLabByID(labID)
            newUser = User(userID, username, firstName, lastName, description, newLab, category, email, "")

            self.printUserInfo("edit", newUser, "Dup_un")
    def addUser(self, form):

        db = self.__db
        cursor = self.__cursor
        hostname = self.__hostname
        mail_server = self.__mail_server  # August 19, 2011

        mail_programmer = self.__mail_programmer  # July 30, 2010
        mail_biologist = self.__mail_biologist
        mail_admin = self.__mail_admin

        # print "Content-type:text/html"		# TEMPORARY, REMOVE AFTER DEBUGGING TO HAVE SCRIPT REDIRECT PROPERLY!!!!!!
        # print					# DITTO
        # print `form`

        uHandler = UserHandler(db, cursor)
        lHandler = LabHandler(db, cursor)
        pHandler = ProjectDatabaseHandler(db, cursor)

        ucMapper = UserCategoryMapper(db, cursor)
        category_Name_ID_Map = ucMapper.mapCategoryNameToID()

        # Get form values
        labID = int(form.getvalue("labs"))
        username = form.getvalue("username")

        firstName = form.getvalue("firstName")
        lastName = form.getvalue("lastName")
        description = firstName + " " + lastName

        to_email = form.getvalue("email")

        from_email = mail_admin

        # Change July 30, 2010 - random password generator
        # passwd = form.getvalue("password")

        chars = string.letters + string.digits
        passwd = ""

        for i in range(10):
            passwd += choice(chars)

            # System access level: Lab default or override?
            # if form.getvalue("privChoiceRadio") == 'override':
        accessLevel = category_Name_ID_Map[form.getvalue("system_access_level")]
        # else:
        # accessLevel = lHandler.findDefaultAccessLevel(labID)

        newProps = {}

        try:
            # Insert User information
            userID = uHandler.insertUser(
                username, firstName, lastName, description, accessLevel, to_email, passwd, labID
            )
            # newUser = uHandler.getUserByID(userID)
            tmpLab = lHandler.findLabByID(labID)
            # print tmpLab.getName()

            # Insert Project info
            # Sept. 11/07: Differentiate between user categories Reader and Writer - different field names
            if form.has_key("userProjectsReadonly"):
                # list of IDs
                readonlyProjects = utils.unique(form.getlist("userProjectsReadonly"))
                # print `readonlyProjects`
                pHandler.insertMemberProjects(userID, readonlyProjects, "Reader")

            elif form.has_key("userProjectsReadonlyWrite"):
                # list of IDs
                readonlyProjects = utils.unique(form.getlist("userProjectsReadonlyWrite"))
                # print `readonlyProjects`
                pHandler.insertMemberProjects(userID, readonlyProjects, "Reader")

                # Write projects exist only for Writers
            if form.has_key("userProjectsWrite"):
                writeProjects = utils.unique(form.getlist("userProjectsWrite"))
                pHandler.insertMemberProjects(userID, writeProjects, "Writer")

                # don't assign projects to a User instance - will retrieve them from db in output function
            newUser = User(
                userID,
                username,
                firstName,
                lastName,
                description,
                tmpLab,
                form.getvalue("system_access_level"),
                to_email,
                passwd,
                [],
                [],
            )

            email_subject = "OpenFreezer User Account"

            msg = email.MIMEMultipart.MIMEMultipart("alternative")

            msg["Subject"] = email_subject
            msg["To"] = to_email

            msgText = (
                "Hi "
                + firstName
                + ",<BR><BR>An OpenFreezer account has been created for you.&nbsp;&nbsp;Your access level is "
                + form.getvalue("system_access_level")
                + ", so you can "
            )

            if form.getvalue("system_access_level") == "Reader":
                msgText += "search for clones.&nbsp;&nbsp;If you wish to add/modify reagents or create projects, please contact the administrator to upgrade your access level.<BR>"

            elif form.getvalue("system_access_level") == "Writer":
                msgText += "search, add, and modify reagents.&nbsp;&nbsp;If you wish to create projects, please contact the administrator to upgrade your access level.<BR>"

            elif form.getvalue("system_access_level") == "Creator":
                msgText += "search for clones, add and modify reagents, as well as create your own projects.<BR>"

                #####################################################
                # CHANGE TEXT AS NEEDED
                #####################################################

            msgText += (
                "<BR>The URL to access the system is <a href='"
                + hostname
                + "'>"
                + hostname
                + "</a>.&nbsp;&nbsp;Your username is <b>"
                + username
                + "</b>, and your temporary password is <b>"
                + passwd
                + "</b>.&nbsp;&nbsp;Please <u>change the temporary password as soon as you log into the website</u> - you can do it through the 'Change your password' link under the 'User Management' menu section.<BR><BR>Please refer to http://openfreezer.org for additional support.<BR><BR>Sincerely,<BR>OpenFreezer  support team.<BR><BR><span style='font-family:Courier; font-size:10pt;'><HR>This is an automatically generated e-mail message.&nbsp;&nbsp;Please do not reply to this e-mail.&nbsp;&nbsp;All questions should be directed to your local administrator.</span>"
            )

            msgText = email.MIMEText.MIMEText(msgText, "html")
            msg.attach(msgText)

            server = smtplib.SMTP(mail_server)
            server.set_debuglevel(1)

            server.sendmail(from_email, [to_email], msg.as_string())
            server.quit()

            self.printUserInfo("view", newUser)

        except DeletedUserException:

            # Without asking too many questions, reactivate the deleted user and overwrite his/her attributes with the form input values
            userID = uHandler.findUserIDByUsername(username)

            newProps["firstname"] = firstName
            newProps["lastname"] = lastName
            newProps["description"] = description
            newProps["email"] = email
            newProps["status"] = "ACTIVE"
            newProps["password"] = passwd

            # Insert new database values and create new object
            uHandler.updateUserProperties(userID, newProps)  # database update
            newUser = uHandler.getUserByID(userID)

            # Insert Project info
            readProjects = []
            writeProjects = []

            if form.has_key("userProjectsReadonly"):
                # list of IDs
                readonlyProjects = form.getlist("userProjectsReadonly")

                for r in readonlyProjects:
                    pHandler.addProjectMember(r, userID, "Reader")

                    # tmpReadProject = pHandler.findPacket(r)
                    # readProjects.append(tmpReadProject)
                    # newUser.addProject(tmpReadProject, 'read')

            if form.has_key("userProjectsWrite"):
                writeProjects = form.getlist("userProjectsWrite")

                for w in writeProjects:
                    pHandler.addProjectMember(w, userID, "Writer")

                    # tmpWriteProject = pHandler.findPacket(w)
                    # writeProjects.append(tmpWriteProject)
                    # newUser.addProject(tmpWriteProject, 'write')

                    # newUser.setReadProjects(readProjects)
                    # newUser.setWriteProjects(writeProjects)

            self.printUserInfo("view", newUser)
            # utils.redirect(hostname + "User.php?View=3&fd=" + filename)

        except DuplicateUsernameException:

            # return to the view with input values and error message
            # Need to construct a dummy User instance to save form values for error output on the next page (otherwise they're lost as soon as Submit is pressed and creation view is exited)
            newLab = lHandler.findLabByID(labID)
            newUser = User(0, username, firstName, lastName, description, newLab, "", email, passwd)

            self.printUserInfo("create", newUser)