def combineCSV(env):
    featureVectorLoc = conf[env]["feature_vector"]
    csvDirectory = os.path.dirname(featureVectorLoc)
    counter = True  # only generate the new dictionary for 2nd feature csv file

    for name in glob.glob(csvDirectory + "/" + env + "Feature?.csv"):
        if os.path.basename(name) == env + "Feature1.csv":
            r = csv.reader(open(name, "rb"))
            mainDict = OrderedDict((row[0], row[1:]) for row in r)
        else:
            r = csv.reader(open(name, "rb"))
            if counter:
                secondDict = dict({row[0]: row[1:] for row in r})
                counter = False
            else:
                for keys, values in dict({row[0]: row[1:] for row in r}).iteritems():
                    secondDict[keys].extend(values[0:])

    result = OrderedDict()
    for d in (mainDict, secondDict):
        for key, value in d.iteritems():
            result.setdefault(key, []).extend(value)

    # write the combine csb file to feature vector location
    with open(featureVectorLoc, "w+") as f:
        w = csv.writer(f)
        for key, value in result.iteritems():
            w.writerow([key] + value)
Example #2
1
class Chrono:
    def __init__(self):
        self.timings = OrderedDict()

    def measure(self, what):
        return _ChronoCM(lambda t: self._done(what, t))

    def _done(self, what, t):
        self.timings.setdefault(what, []).append(t)

    def times(self, what):
        return self.timings[what]

    def avgtime(self, what, dropfirst=False):
        timings = self.timings[what]
        if dropfirst and len(timings) > 1:
            timings = timings[1:]
        return sum(timings) / len(timings)

    def __str__(self, fmt="{}"):
        avgtimes = {k: self.avgtime(k) for k in self.timings}
        l = max(map(len, avgtimes))
        return "\n".join(
            ("{:{l}s}: " + fmt).format(k, v, l=l) for k, v in sorted(avgtimes.items(), key=lambda t: t[1], reverse=True)
        )
Example #3
1
    def _groupResults(self, results, formatter):
        """
        Group tests together based on their results.

        @param results: An iterable of tuples of two or more elements.  The
            first element of each tuple is a test case.  The remaining
            elements describe the outcome of that test case.

        @param formatter: A callable which turns a test case result into a
            string.  The elements after the first of the tuples in
            C{results} will be passed as positional arguments to
            C{formatter}.

        @return: A C{list} of two-tuples.  The first element of each tuple
            is a unique string describing one result from at least one of
            the test cases in C{results}.  The second element is a list of
            the test cases which had that result.
        """
        groups = OrderedDict()
        for content in results:
            case = content[0]
            outcome = content[1:]
            key = formatter(*outcome)
            groups.setdefault(key, []).append(case)
        return groups.items()
Example #4
1
def get_all_kplex(graph, k):
    triangles = find_triangles(graph)
    kplexes = OrderedDict()
    while len(triangles) > 0:
        t = triangles.pop()
        [kplex, peripheral] = get_kplex(graph, t, k)
        kplex.sort()
        peripheral.sort()
        kplex = tuple(kplex)
        kplexes.setdefault(kplex, set())
        kplexes[kplex] = kplexes[kplex].union(set(peripheral))

        # here we remove triangles containing a vertex in the kplex to prevent duplicates
        # not sure what we should do about vertex belonging to multiple kplexes?
        # new_triangles = []
        # for t in triangles:
        #    is_valid = True
        #    for v in kplex:
        #        if v in t:
        #            is_valid = False
        #            break
        #    if is_valid:
        #        new_triangles.append(t)
        #
        # triangles = new_triangles
    return kplexes
Example #5
1
class DownloaderSimulator(BaseDownloaderSimulator):
    def __init__(self, rate):
        self._requests_per_slot = rate
        self.slots = OrderedDict()
        super(DownloaderSimulator, self).__init__()

    def update(self, requests):
        for request in requests:
            hostname = urlparse(request.url).hostname or ""
            self.slots.setdefault(hostname, deque()).append(request)

    def download(self):
        output = []
        _trash_can = []
        for key, requests in six.iteritems(self.slots):
            for i in range(min(len(requests), self._requests_per_slot)):
                output.append(requests.popleft())
            if not requests:
                _trash_can.append(key)

        for key in _trash_can:
            del self.slots[key]
        return output

    def downloader_info(self):
        info = {"key_type": "domain", "overused_keys": []}
        for key, requests in six.iteritems(self.slots):
            if len(requests) > self._requests_per_slot:
                info["overused_keys"].append(key)
        return info

    def idle(self):
        return len(self.slots) == 0
Example #6
1
    def downstream_op_iter(idxs):
        """Key routine in recognizing refactor opportunities.

        idxs: a list of (int i, node n) pairs such that each node is some
            x[i].owner, for one base variable x.

        Returns: ((op, pos, itypes), nodes)
            Each nodes[i] is an op(..., x[i], ...) where the relevant slice of
            x shows up in position `pos` in the inputs to every node.
        """
        ops = OrderedDict()
        if range(len(idxs)) == list(zip(*idxs)[0]):
            for i0, n in idxs:
                for client_apply, pos_in_client in n.outputs[0].clients:
                    key = (client_apply.op, pos_in_client)
                    otypes = (tuple(i.type for i in client_apply.outputs),)
                    assert len(set(otypes)) == 1
                    key += (otypes[0],)
                    key += (tuple(tuple(i.broadcastable) for i in client_apply.inputs),)
                    ops.setdefault(key, []).append(client_apply)
            for key, ins in ops.items():
                # print key
                # print len(ins)
                # print len(idxs)
                # print ins
                if len(ins) == len(idxs):
                    yield (key, ins)
        else:
            # TODO work with this case
            pass
Example #7
1
 def group(self, context, v, _get=ExpressionModifiersBase._lookup_key):
     seq, key = v
     result = OrderedDict()
     for item in seq:
         k = _get(item, key)
         result.setdefault(k, []).append(item)
     return result
Example #8
0
    def dwell_times(self, labels):
        """Determine the dwell time for each consecutive labels sequence.
        Returns an ordered dict with labels as keys and list of duration time as
        values."""

        classes = np.unique(labels)
        counts = OrderedDict()
        for class_ in classes:
            counts.setdefault(class_, [])

        labels = labels.flatten()
        counter = 0
        for i, label in enumerate(labels):
            try:
                if labels[i] == labels[i + 1]:
                    counter += 1
                else:
                    counts[label].append(counter)
                    counter = 1  # at least one frame
            except IndexError:
                pass  # no index i+1

        for key, value in counts.iteritems():
            counts[key] = np.array(value) * self.stepwidth

        return counts
def tokenize_proteins(data, msg="Processing proteins"):
    """Distribute all poses into either decoys list or actives OrderedDict.
    Poses placed into the actives OrderedDict are further organized into
    sublists for each ligand.

    args:
        @data list of string lines containing pose data
        @msg string message to display in progress bar
    returns:
        @actives OrderedDict of all active poses gathered from data
        @decoys list of all decoy poses gathered from data
    """

    actives = OrderedDict()
    decoys = list()
    bar = Bar(msg, max=len(data))

    for i, line in enumerate(data):
        bar.next()
        pose = posedict(line)  # Token -> List
        if pose["label"] == 1:  # Pose -> Decoys
            pose["id"] = pose["ligand"] + "-" + str(i)
            actives.setdefault(pose["ligand"], []).append(pose)
        else:  # Pose -> Actives
            decoys.append(pose)
    bar.finish()
    print ""

    return actives, decoys
Example #10
0
    def ToString(self, prefix=""):
        """
        Returns a string version of the profile, with all values properly tabulated.

        :rtype: :func:`string`

        :param in prefix: string to be prepended to every line of the returned string.
        """
        retstr = ""
        try:
            from tabulate import tabulate

            newDict = OrderedDict()
            times = []
            for key, values in self.entries.iteritems():
                for val in values:
                    if val[0] not in times:
                        times.append(val[0])
            newDict["time(s)"] = sorted(times)
            for key, values in self.entries.iteritems():
                for t in times:
                    newDict.setdefault(key, []).append(
                        float(utils.get_value_at_time(values, t, interpolate=key in self.interpolated_profiles))
                    )
            retstr = tabulate(newDict, headers="keys", floatfmt=".1f")
            r = retstr
            retstr = ""
            for line in r.split("\n"):
                retstr += "{}{}\n".format(prefix, line)
        except ImportError:
            print >>sys.stderr, "Tabulate module should be installed for printing profiles."
        return retstr
Example #11
0
 def _init_root_dic(self):
     "initialize the root dic for GLUE and INT"
     dic = OrderedDict()
     new_dic = QT_GLUE_INT_OPERATION_DIC.copy()
     for entry in new_dic:
         dic.setdefault(entry, new_dic[entry])
     self.root_dic = dic
Example #12
0
 def get_archive_year_list(self):
     time_list = self.published().datetimes("modified", "year", order="DESC")
     dicts = OrderedDict()
     for time in time_list:
         articles = self.published().filter(modified__year=time.year)
         dicts.setdefault(time, articles)
     return dicts
Example #13
0
def print_queues():
    """
    Print each queue with waiting or delayed jobs, by priority
    """
    queues = OrderedDict()
    for q in Queue.collection().sort(by="name", alpha=True).instances():
        waiting = q.waiting.llen()
        delayed = q.delayed.zcard()
        if waiting + delayed == 0:
            continue
        name, priority = q.hmget("name", "priority")
        queues.setdefault(name, []).append({"priority": int(priority), "waiting": waiting, "delayed": delayed})

    for name in queues:
        sub_queues = sorted(queues[name], key=itemgetter("priority"), reverse=True)

        total_waiting = sum([q["waiting"] for q in sub_queues])
        total_delayed = sum([q["delayed"] for q in sub_queues])

        if len(sub_queues) == 1:
            priority_part = sub_queues[0]["priority"]
        else:
            priority_part = "----"

        print("%30s  %4s  %4d  %4d" % (name, priority_part, total_waiting, total_delayed))

        if len(sub_queues) > 1:
            for i, q in enumerate(sub_queues):
                print("%30s  %4d  %4d  %4d" % (" ", q["priority"], q["waiting"], q["delayed"]))
Example #14
0
 def get_months_active(self):
     """
     Creates a OrderedDict of the format:
     {
         ...
         '2010': {
             first_day_of_month_datetime: pk_of_first_log,
             ...
         },
     }
     """
     current_month = datetime.datetime.today().month
     # Added the current month to the key to automatically update
     minmax_dict_key = "minmax_dict_%s_%s" % (self.id, current_month)
     minmax_dict = cache.get(minmax_dict_key, None)
     if minmax_dict is None:
         minmax_dict = self.log_set.all().aggregate(last_log=Max("timestamp"), first_log=Min("timestamp"))
         if not minmax_dict["first_log"]:
             return OrderedDict()
         # cache for 10 days
         cache.set(minmax_dict_key, minmax_dict, 864000)
     first_log = minmax_dict["first_log"].date()
     last_log = minmax_dict["last_log"].date()
     last_log = datetime.date(last_log.year, last_log.month, 1)
     current = datetime.date(first_log.year, first_log.month, 1)
     months_active = OrderedDict()
     while current <= last_log:
         months_active.setdefault(current.year, []).append(current)
         if current.month == 12:
             current = datetime.date(current.year + 1, 1, 1)
         else:
             current = datetime.date(current.year, current.month + 1, 1)
     return months_active
def count_overlapping_motifs(sites, filename, cutoff=0.0):
    """
    takes the sitecounts for a motif, in a dictionary format, produced by load_sitecount() function,
    and a filename of a MotEvo output file. Then it counts motifs pairs that are overlapping each other.
    :param sites: dictionary type for a given motif
    :param filename: motevo output filename
    ;:param overlap_limit: an integer that indicates how far the two sites need to be from each other
    :param cutoff: minimum cutoff over the posterior
    :param proxy: optional dictionary that contains as key the IDs of regions of interest
    :return: a dictionary that counts double appearance of motifs, where the overlapping sites are filtered
    """
    double_sitecounts = OrderedDict()
    for region in sites.keys():
        double_sitecounts.setdefault(region, 0)
    with open(filename, "r") as inf:
        for rec in csv.reader(inf, delimiter="\t"):
            region_name = rec[3].split(";")[-1]
            if not (region_name in sites):
                continue
            post = float(rec[4])
            if post > cutoff:
                start = int(rec[1])
                end = int(rec[2])
                for a_site in sites[region_name]:
                    if start >= a_site["start"]:
                        if (a_site["end"] - start) > 0:
                            double_sitecounts[region_name] += 1
                    else:
                        if (end - a_site["start"]) > 0:
                            double_sitecounts[region_name] += 1
    return double_sitecounts
Example #16
0
def _header_to_section(header, resolution):
    """
    converts row-names of the form 'chr12\t1000-2000' into sections, suitable
    to create HiC_data objects. Also creates chromosomes, from the reads
    """
    chromosomes = OrderedDict()
    sections = {}
    sections = {}
    chromosomes = None
    if isinstance(header, list) and isinstance(header[0], tuple) and len(header[0]) > 1:
        chromosomes = OrderedDict()
        for i, h in enumerate(header):
            if "-" in h[1]:
                a, b = map(int, h[1].split("-"))
                if resolution == 1:
                    resolution = abs(b - a)
                elif resolution != abs(b - a):
                    raise Exception("ERROR: found different resolution, " + "check headers")
            else:
                a = int(h[1])
                if resolution == 1 and i:
                    resolution = abs(a - b)
                elif resolution == 1:
                    b = a
            sections[(h[0], a / resolution)] = i
            chromosomes.setdefault(h[0], 0)
            chromosomes[h[0]] += 1
    return chromosomes, sections, resolution
Example #17
0
    def get_updates(variables):
        # this is fugly because we must get the batch stats from the
        # graph so we get the ones that are *actually being used in
        # the computation* after graph transforms have been applied
        updates = []
        variables = graph.deep_ancestors(variables)
        for stat, role in BatchNormalization.roles.items():
            from blocks.roles import has_roles

            batch_stats = [var for var in variables if has_roles(var, [role])]
            batch_stats = util.dedup(batch_stats, equal=util.equal_computations)

            batch_stats_by_brick = OrderedDict()
            for batch_stat in batch_stats:
                brick = batch_stat.tag.batch_normalization_brick
                population_stat = brick.population_stats[stat]
                batch_stats_by_brick.setdefault(brick, []).append(batch_stat)

            for brick, batch_stats in batch_stats_by_brick.items():
                population_stat = brick.population_stats[stat]
                if len(batch_stats) > 1:
                    # makes sense for recurrent structures
                    logger.warning(
                        "averaging multiple population statistic estimates to update %s: %s"
                        % (util.get_path(population_stat), batch_stats)
                    )
                batch_stat = T.stack(batch_stats).mean(axis=0)
                updates.append((population_stat, (1 - brick.alpha) * population_stat + brick.alpha * batch_stat))
        return updates
Example #18
0
    def _blockData(self, data_d):

        blocks = OrderedDict({})
        coverage = {}

        for field in self.blocker.tfidf_fields:
            self.blocker.tfIdfBlock(((record_id, record[field]) for record_id, record in data_d.iteritems()), field)

        for block_key, record_id in self.blocker(data_d.iteritems()):
            blocks.setdefault(block_key, []).append((record_id, data_d[record_id]))

        # Redundant-free Comparisons from Kolb et al, "Dedoop:
        # Efficient Deduplication with Hadoop"
        # http://dbs.uni-leipzig.de/file/Dedoop.pdf
        for block_id, (block, records) in enumerate(blocks.iteritems()):
            for record_id, record in records:
                coverage.setdefault(record_id, []).append(block_id)

        for block_id, (block_key, records) in enumerate(blocks.iteritems()):
            tuple_records = []
            for record_id, record in records:
                smaller_ids = set([covered_id for covered_id in coverage[record_id] if covered_id < block_id])
                tuple_records.append((record_id, record, smaller_ids))

            yield tuple_records
    def forwards(self, orm):
        # make sure all users have an email
        for user in orm["users.User"].objects.filter(email=""):
            user.email = user.username
            user.save()

        emails = OrderedDict()
        for userid, email in orm["users.User"].objects.values_list("id", "email"):
            emails.setdefault(email, []).append(userid)

        bad = set()
        for email, userids in emails.iteritems():
            if len(userids) > 1:
                logger.error(
                    'E-mail address "%s" belongs to multiple users '
                    "with IDs %s!" % (email, ", ".join(map(str, userids)))
                )
                bad.add(email)

        for email, userids in emails.iteritems():
            for userid in userids[1:]:
                updated = "%s_%s" % (userid, email)
                u = orm["users.User"].objects.filter(id=userid)
                u.update(email=updated)
                logger.warning("Renamed %s -> %s" % (email, updated))

        if bad:
            logger.warning("Some usernames were changed")
Example #20
0
    def _blockData(self, data_1, data_2):

        blocks = OrderedDict({})
        coverage = {}

        for field in self.blocker.tfidf_fields:
            fields_1 = ((record_id, record[field]) for record_id, record in data_1.iteritems())
            fields_2 = ((record_id, record[field]) for record_id, record in data_2.iteritems())

            self.blocker.tfIdfBlock(fields_1, fields_2, field)

        for block_key, record_id in self.blocker(data_1.iteritems()):
            blocks.setdefault(block_key, ([], []))[0].append((record_id, data_1[record_id]))

        for block_key, record_id in self.blocker(data_2.iteritems()):
            if block_key in blocks:
                blocks[block_key][1].append((record_id, data_2[record_id]))

        for block_id, (block, sources) in enumerate(blocks.iteritems()):
            for source in sources:
                for record_id, record in source:
                    coverage.setdefault(record_id, []).append(block_id)

        for block_id, (block_key, sources) in enumerate(blocks.iteritems()):
            tuple_block = []
            for source in sources:
                tuple_source = []
                for record_id, record in source:
                    smaller_ids = set([covered_id for covered_id in coverage[record_id] if covered_id < block_id])
                    tuple_source.append((record_id, record, smaller_ids))
                tuple_block.append(tuple_source)

            yield tuple_block
Example #21
0
    def _update_tiered_policy(self, ep_id):
        """
        Sends an updated list of tiered policy to an endpoint.

        Recalculates the list.
        :param ep_id: ID of the endpoint to send an update to.
        """
        _log.debug("Updating policies for %s from %s", ep_id, self.pol_ids_by_ep_id)
        # Order the profiles by tier and profile order, using the name of the
        # tier and profile as a tie-breaker if the orders are the same.
        profiles = []
        for pol_id in self.pol_ids_by_ep_id.iter_values(ep_id):
            try:
                tier_order = self.tier_orders[pol_id.tier]
            except KeyError:
                _log.warn("Ignoring profile %s because its tier metadata is " "missing.")
                continue
            profile_order = self.profile_orders[pol_id]
            profiles.append((tier_order, pol_id.tier, profile_order, pol_id.policy_id, pol_id))
        profiles.sort()
        # Convert to an ordered dict from tier to list of profiles.
        pols_by_tier = OrderedDict()
        for _, tier, _, _, pol_id in profiles:
            pols_by_tier.setdefault(tier, []).append(pol_id)

        endpoint = self.objects_by_id[ep_id]
        endpoint.on_tiered_policy_update(pols_by_tier, async=True)
Example #22
0
def joincsv(csvjoin1, csvjoin2):

    import csv
    from collections import OrderedDict

    with open(csvjoin2, "rb") as f:
        r = csv.reader(f)
        header2 = r.next()
        header2 = header2[1:]
        dict2 = {row[0]: row[1:] for row in r}

    with open(csvjoin1, "rb") as f:
        r = csv.reader(f)
        header1 = r.next()
        dict1 = OrderedDict((row[0], row[1:]) for row in r)

    header = header1 + header2

    result = OrderedDict()
    for d in (dict1, dict2):
        for key, value in d.iteritems():
            result.setdefault(key, []).extend(value)

    with open("master_data_transform_final.csv", "wb") as f:
        w = csv.writer(f)
        w.writerow(header)
        for key, value in result.iteritems():
            w.writerow([key] + value)
def makeSitecount(infile, outfile=None, cutoff=0.0, proxy=None):
    """it takes the name of a MotEvo file and returns a dictionary with
    that contains the sum of posterior for each region
    """
    sitecounts = OrderedDict()  # used to remember the insertion order
    if not proxy:  # if the proxy BED file is not provided
        with open(infile, "r") as inf:
            for rec in csv.reader(inf, delimiter="\t"):
                region_name = rec[3].split(";")[-1]
                sitecounts.setdefault(region_name, 0.0)
                post = float(rec[4])
                if post > cutoff:
                    sitecounts[region_name] += post
    else:
        regions_of_interest = dict([(l.split()[4], 0) for l in open(proxy)])  # by default it' assumed that 5th
        # column contains the region IDs
        with open(infile, "r") as inf:
            for rec in csv.reader(inf, delimiter="\t"):
                region_name = rec[3].split(";")[-1]
                if not (region_name in regions_of_interest):
                    continue
                sitecounts.setdefault(region_name, 0.0)
                post = float(rec[4])
                if post > cutoff:
                    sitecounts[region_name] += post
    return sitecounts
Example #24
0
def token_map(stream, keyfunc=lambda op: op.token, valuefunc=lambda op: op.token):
    """
    Apply all token operations in order and construct a mapping.
    """
    present = {}
    tokens = OrderedDict()

    for operation in stream:
        key = keyfunc(operation)

        if isinstance(operation, AddTokenOp):
            if present.get(key, False):
                raise DuplicateTokenError(key, operation.token)
            else:
                present[key] = True

            tokens[key] = valuefunc(operation)
        elif isinstance(operation, SetDefaultTokenOp):
            tokens.setdefault(key, valuefunc(operation))
        elif isinstance(operation, RemoveTokenOp):
            try:
                del tokens[key]
            except KeyError:
                raise NoSuchTokenError(key, operation.token)

            present[key] = False

    return tokens
Example #25
0
def parse_repos():
    lines = read_file_contents_as_lines("repos")
    if not lines:
        sys.exit("error with repos file")

    data = OrderedDict()
    section = ""
    for i, l in enumerate(lines):
        if not l:
            continue

        if l[0] == "[":
            section = l.translate(None, "[]")
        else:
            if l[0] == "#":
                continue

            if not section:
                sys.exit("Error with repos file, needs to start with a section!")

            parts = l.split()
            if len(parts) != 2:
                sys.exit("Bad line in repos file:\n{}".format(l))
            repo, branch = l.split()

            data.setdefault(section, list()).append((repo, branch))
    return data
Example #26
0
    def map(self, coordinates, filelist, strand=True, col=5):
        # read coordinates
        mapto = OrderedDict()
        with open(coordinates) as coor:
            for lin in coor:
                line_split = lin.split("\t")
                if strand:
                    mapto.setdefault(
                        line_split[0] + line_split[1] + line_split[2] + line_split[5].strip("\n"), []
                    ).append(lin.strip("\n"))
                else:
                    mapto.setdefault(line_split[0] + line_split[1] + line_split[2], []).append(lin.strip("\n"))

        for fname in filelist:
            run_mapto = deepcopy(mapto)
            with open(fname) as f:
                for lin in f:
                    line_split = lin.split("\t")
                    if strand:
                        cor = line_split[0] + line_split[1] + line_split[2] + line_split[5].strip("\n")
                    else:
                        cor = line_split[0] + line_split[1] + line_split[2]
                    run_mapto[cor].append(line_split[col - 1])
            with open(fname + "mapped", "w") as fout:
                for key in run_mapto:
                    if len(run_mapto[key]) == 1:
                        run_mapto[key].append("0")
                    fout.write("\t".join(run_mapto[key]) + "\n")
Example #27
0
def main():
    args = arguments()
    motif_sites = load_sitecounts(args.input_file, args.cutoff, args.proxyBED)
    if args.proxyBED:
        regions_of_interest = load_regions_of_interest(args.proxyBED)
    else:
        regions_of_interest = None

    if args.output_dir:
        outdir = args.output_dir
    else:
        outdir = ""

    double_sitecounts = OrderedDict()
    for motif in os.listdir(args.dirname):
        motevo_output_file = os.path.join(args.dirname, motif)
        double_sitecounts.setdefault(motif, 0)
        double_sitecounts[motif], dist = count_double_motifs(
            motif_sites, motevo_output_file, args.overlap, args.cutoff, regions_of_interest
        )

        fname = os.path.join(outdir, "%s.dist" % motif)
        with open(fname, "w") as outf:
            for i, d in enumerate(dist):
                outf.write("\t".join([str(i), str(d) + "\n"]))

    with open(os.path.join(outdir, os.path.basename(args.input_file)), "w") as outf:
        outf.write(os.path.basename(args.input_file) + "\n")
        for motif, count in double_sitecounts.items():
            outf.write("\t".join([motif, str(count) + "\n"]))
    return 0
Example #28
0
def extract_extra_ingredients(nodes, is_section_header):
    section = None
    sections = OrderedDict()

    for node in nodes:
        text = node.extract() if isinstance(node, XPathSelector) else node
        text = html_to_text(text).strip()

        if not text:
            continue

        if is_section_header(node):
            section = text
            continue

        sections.setdefault(section, []).append(text)

    if None in sections:
        ingredients = sections.pop(None)
    elif sections:
        ingredients = sections.pop(sections.keys()[-1])
    else:
        ingredients = []

    extra_ingredients = [x for y in sections.values() for x in y]

    return (ingredients, extra_ingredients)
Example #29
0
class Args(object):
    def __init__(self, *args, **kwargs):
        self._items = OrderedDict()
        for arg in args:
            k, _, v = arg.partition("=")
            k = k.lstrip("-")
            if not kwargs.get("flatten"):
                self._items.setdefault(k, []).append(v)
            else:
                self._items[k] = v

    def get(self, k, default=None):
        return self._items.get(k, default)

    def items(self):
        return self._items.items()

    def __getattr__(self, k, default=None):
        return self._items.get(k, default)

    def __contains__(self, k):
        return k in self._items

    def __getitem__(self, k):
        return self._items["k"]
Example #30
0
def changedp(coins, change):
    # make 0 multidimensional array with dimensions amount x len(coins)
    minCoins = [[0] * (change + 1) for c in range(len(coins) + 1)]
    coinsUsed = [0] * (change + 1)
    coinList = OrderedDict(sorted({}))
    for c in coins:
        coinList.setdefault(c, 0)
        # preset each value for first row as inf
    for amount in range(1, change + 1):
        minCoins[0][amount] = float("inf")
        # end for

        # iterate through coins for each amount
    for c in range(1, len(coins) + 1):
        for amount in range(1, change + 1):
            # check if coin denomination is less than amount
            if coins[c - 1] <= amount:
                low = minCoins[c][amount - coins[c - 1]]
            if minCoins[c - 1][amount] <= 1 + low:
                minCoins[c][amount] = minCoins[c - 1][amount]
            else:
                minCoins[c][amount] = 1 + low
                coinsUsed[amount] = coins[c - 1]
                # end for
                # end for
    finalList = []
    remainder = change
    # move backwards through array to get used coins for finalList
    while remainder > 0:
        coinList[coinsUsed[remainder]] = coinList[coinsUsed[remainder]] + 1
        finalList.append(coinsUsed[remainder])
        remainder = remainder - coinsUsed[remainder]
        # end while
    return minCoins[len(coins)][change], coinList.values()