Ejemplo n.º 1
0
def trim_list(list):
    "Trim false elements on both ends"
    ltrimmed = [x for x in itertools.dropwhile(lambda x: not x, list)]
    ltrimmed.reverse()
    trimmed = [x for x in itertools.dropwhile(lambda x: not x, ltrimmed)]
    trimmed.reverse()
    return trimmed
Ejemplo n.º 2
0
    def __init__(self, resource=None, fields=None, sheet=0,
                 encoding=None, skip_rows=0, has_header=True,
                 stop_empty_line=False):
        """Creates a XLSX spreadsheet data source stream.

        Attributes:

        * fields: `bubbles.metadata.FieldList` to use instead of auto-detection
        * resource: file name, URL or file-like object
        * sheet: sheet index number (as int) or sheet name
        * has_header: flag determining whether first line contains header or
          not. ``True`` by default.
        * stop_empty_line: flag to stop iteration over rows at the first
          encounter with an empty line. As XLSX files can contain millions
          or rows, this might cause very long iterations, especially if
          all the lines are empty past a certain point
        """
        if isinstance(resource, openpyxl.Workbook):
            self.workbook = resource
        else:
            self.workbook = _load_workbook(resource)

        if isinstance(sheet, int):
            self.sheet = self.workbook.worksheets[sheet]
        elif isinstance(sheet, str):
            self.sheet = self.workbook[sheet]
        else:
            raise ArgumentError('sheet has to be a string or an integer')

        if has_header:
            self.first_row = skip_rows + 1
        else:
            self.first_row = skip_rows

        self.stop_empty_line = stop_empty_line

        if fields:
            self.fields = fields
        else:
            rows = enumerate(self.sheet.rows)
            first_row = next(dropwhile(lambda x: x[0] < self.first_row,
                                       rows))[1]
            if has_header:
                header_rows = enumerate(self.sheet.rows)
                header_row = next(dropwhile(lambda x: x[0] < (self.first_row - 1),
                                            header_rows))[1]
                # fetch names, replace line breaks by spaces in case of
                # manual line wrapping
                names = [' '.join(str(c.value).split()) for c in header_row]
            else:
                names = ['col%d' % i for i in range(len(first_row))]

            self.fields = FieldList()
            for name, cell in zip(names, first_row):
                if cell.is_date:
                    storage_type = 'date'
                else:
                    storage_type = CELL_TYPES.get(cell.data_type, 'unknown')
                field = Field(name, storage_type=storage_type)
                self.fields.append(field)
Ejemplo n.º 3
0
def run_ogg_info(cmdpath, filename):
    """Run ogginfo.exe from `cmdpath` on `filename`.
    Returns OggInfo object."""
    proc = subprocess.Popen([cmdpath, filename], stdout=subprocess.PIPE, universal_newlines=True)
    data = list(proc.stdout)
    metadata = { "title": None, "artist": None, "album": None, "tracknumber": None, "length": None }
    # Find User comments
    userdata = itertools.dropwhile(lambda s : not s.startswith("User comments section follows"), data)
    for line in userdata:
        for st in ["title", "artist", "album", "tracknumber"]:
            m = re.search("{}\s*=\s*(.*)".format(st), line)
            if m != None:
                metadata[st] = m.group(1)
    # Find stream information
    streamdata = itertools.dropwhile(lambda s : not s.startswith("Vorbis stream"), data)
    for line in streamdata:
        m = re.search("Playback length:\s*(.*)", line)
        if m != None:
            # Expect format "{int}m:{float}s"
            t = re.search("(\d+)m:([\d\.]+)s", m.group(1))
            if t == None:
                raise Exception("Unknown time code: '{}'".format(m.group(1)))
            else:
                metadata["length"] = int(t.group(1)) * 60 + math.ceil(float(t.group(2)))
    # Return
    return OggInfo(metadata["title"], metadata["artist"], metadata["album"],
                   metadata["tracknumber"], metadata["length"])
Ejemplo n.º 4
0
    def _FillDIMMs(self, lsCfgData):
        """Fills RAM modules information from 'lscfg -vp' output stored in lsCfgData list"""
        iterDIMMsData = it.dropwhile(lambda x: not RE_RAM_MODULE.match(x), lsCfgData)
        dDIMMs = {}
        self.iDIMMs = 0
        try:
            while True:
                sHWLoc, sName, sSN, sPN, iSize = ('', '', '', '', 0)   # empty variables
                iterDIMMsData = it.dropwhile(lambda x: not RE_RAM_MODULE.match(x), iterDIMMsData)
                # we are at first line of disk's description. Let's parse it.
                # sL1 = next(iterDIMMsData).strip()
                next(iterDIMMsData)
                # oLog.debug('_FillDIMMs: 1st line is {}'.format(sL1))
                self.iDIMMs += 1
                sL = '--------'   # initialize loop variable
                while sL != '':
                    sL = next(iterDIMMsData).strip()
                    if sL[:22] == "Hardware Location Code":
                        sHWLoc = RE_DOTS.split(sL)[1]
                        sName = 'RAM Module {}'.format(sHWLoc.split('.')[-1])
                    elif sL[:13] == "Serial Number":
                        sSN = RE_DOTS.split(sL)[1]
                    elif sL[:11] == "Part Number":
                        sPN = RE_DOTS.split(sL)[1]
                    elif sL[:6] == "Size..":
                        iSize = int(RE_DOTS.split(sL)[1]) // 1024
                    else:
                        pass   # skip unknown lines
                # collect all the information to one data structure
                dDIMM_Dict = {'SN': sSN, 'PN': sPN, 'Loc': sHWLoc, 'Size': iSize}
                dDIMMs[sName] = dDIMM_Dict
                continue   # while true
        except StopIteration:
            # end of lscfg output, no more DIMMs
            pass

        # now dDIMMs dictionary contains our information, but the
        # dictionary's key is not perfect for Zabbix item name, we need to
        # shorten it and remove uniqueness linked with usage of box S/N in
        # DIMM position. First, we need to arrange modules by boxes
        dDimmsByBoxes = {}
        for sName, dValue in dDIMMs.items():
            sBoxName, sOther = sName.split('-', maxsplit=1)
            # if adding a first element, create a dictionary
            if dDimmsByBoxes.get(sBoxName, None) is None:
                dDimmsByBoxes[sBoxName] = {sOther: dValue}
            else:
                dDimmsByBoxes[sBoxName][sOther] = dValue
        # Now (hopefully) all DIMMs are grouped by a box. Just sort and number these boxes
        lBoxNames = list(dDimmsByBoxes.keys())
        lBoxNames.sort()        # <-- in place
        for iBoxNum in range(0, len(lBoxNames)):
            dInBox = dDimmsByBoxes[lBoxNames[iBoxNum]]
            for sOther, dValue in dInBox.items():
                sName = "Box{}-{}".format(iBoxNum + 1, sOther)
                oDIMM = IBM_DIMM_Module(sName, dValue['PN'], dValue['SN'], dValue['Loc'],
                                        dValue['Size'])
                # oLog.debug('DIMM object created: ' + str(oDIMM))
                self.lDIMMs.append(oDIMM)
        return
Ejemplo n.º 5
0
def move_protocol_to_wiki(session, pad_name_addon=None):
    if pad_name_addon:
        pad_name = session.pad_name.format(pad_name_addon)
    else:
        pad_name = session.pad_name
    pad_lines = download_pad(session.pad_team, pad_name)
    logg.debug("downloaded pad for %s", pad_name)
    #vorspann wegwerfen
    pad_it = dropwhile(lambda s: "= Protokoll ab hier =" not in s, pad_lines)
    next(pad_it)
    # leere Zeilen wegwerfen
    pad_it = dropwhile(lambda s: not s.strip(), pad_it)
    header = next(pad_it)
    match = re.search(r"(\d+)\.(\d+)\.(\d\d+)", header)
    if not match:
        raise Exception("Mit dem Protokoll-Anfang stimmt was nicht, Datum konnte nicht erkannt werden: {}".format(header))
    day, month, year = [int(e) for e in match.groups()]
    if year < 100:
        year += 2000
    session_date = date(year=year, month=month, day=day)
    reversed_date = reverse_date_format(session_date)
    unquote_func = lambda s: s.replace("&gt;", ">").replace("&lt;", "<")
    edit_uri = "http://wiki.piratenpartei.de/wiki//index.php?title={}&action=edit"
    logg.debug("Header ist:\n%s, Protokoll-Datum %s", header, reversed_date)
    logg.info("Inhalt:" + "-" * 80)
    print(header + "".join(imap(unquote_func, pad_it)))
    logg.info("-" * 80)
    logg.info("Seiten-URI fürs Protokoll:")
    logg.info(edit_uri.format(session.wiki_protocol_uri.format(date=reversed_date)))
Ejemplo n.º 6
0
def text_quote(message):
    # avoid importing a big module by using a simple heuristic to guess the
    # right encoding
    def decode(s, encodings=('ascii', 'utf8', 'latin1')):
        for encoding in encodings:
            try:
                return s.decode(encoding)
            except UnicodeDecodeError:
                pass
        return s.decode('ascii', 'ignore')
    lines = message.splitlines()
    # delete empty lines at beginning and end (some email client insert these
    # outside of the pgp signed message...)
    if lines[0] == '' or lines[-1] == '':
        from itertools import dropwhile
        lines = list(dropwhile(lambda l: l == '', lines))
        lines = list(reversed(list(dropwhile(
            lambda l: l == '', reversed(lines)))))
    if len(lines) > 0 and lines[0] == '-----BEGIN PGP MESSAGE-----' \
            and lines[-1] == '-----END PGP MESSAGE-----':
        try:
            sigs, d = crypto.decrypt_verify(message.encode('utf-8'))
            message = decode(d)
        except errors.GPGProblem:
            pass
    elif len(lines) > 0 and lines[0] == '-----BEGIN PGP SIGNED MESSAGE-----' \
            and lines[-1] == '-----END PGP SIGNATURE-----':
        # gpgme does not seem to be able to extract the plain text part of
        # a signed message
        import gnupg
        gpg = gnupg.GPG()
        d = gpg.decrypt(message.encode('utf8'))
        message = d.data.decode('utf8')
    quote_prefix = settings.get('quote_prefix')
    return "\n".join([quote_prefix + line for line in message.splitlines()])
Ejemplo n.º 7
0
def mpairs(seq1, seq2, key1, key2=None):
    """Generates a tuple of matching pairs
    key1 and key2 are functions (getters, sort of)

    seq1, seq2 must be sorted in ascending order before being passed here
        and also each key value(which is returned by key funcs) must be UNIQUE
        otherwise you will see unexpected results
    """
    key2 = key2 or key1

    seq1, seq2 = iter(seq1), iter(seq2)

    s1, s2 = next(seq1), next(seq2)
    k1, k2 = key1(s1), key2(s2)

    while True:
        try:
            if k1 == k2:
                yield (s1, s2)
                s1, s2 = next(seq1), next(seq2)
                k1, k2 = key1(s1), key2(s2)
            elif k1 < k2:
                s1 = next(dropwhile(lambda x: key1(x) < k2, seq1))
                k1 = key1(s1)
            else:
                s2 = next(dropwhile(lambda x: key2(x) < k1, seq2))
                k2 = key2(s2)

        except StopIteration:
            break
Ejemplo n.º 8
0
def flow(iterable, pipes, session=Session()):
    """Flow data through a pipeline of transforms.

    Takes an iterable and a list of functions ("pipes") to pass it through. The
    output of each pipe serves as the input to the next. The final result is
    just another iterable.

    If the pipes are generators, ``flow`` will be entirely lazy.

    Empty values (``None``) are valid in the pipeline. None-pipes are always
    skipped; their only use is as destination markers for the items finalized
    above. Flow for all items, finalized or not, is resumed following the
    none-pipe.
    """
    if not any(pipes):
        for out_item in iterable:
            yield out_item
        return
    for in_item in iterable:
        if session.consume_finalized():
            remaining_pipes = list(dropwhile(lambda p: p is not None, pipes))
            for out_item in flow([in_item], remaining_pipes, session):
                yield out_item
        else:
            remaining_pipes = list(dropwhile(lambda p: p is None, pipes))
            output = remaining_pipes[0](in_item, session)
            for out_item in flow(output, remaining_pipes[1:], session):
                yield out_item
Ejemplo n.º 9
0
def plot_probability(k, sample_size=1000):
    N = np.arange(1, 200)
    p_pair = [multi_birthday_probability(n, 2, k) for n in N]
    p_pair_ian = [multi_birthday_probability_ian(n, 2, k) for n in N]
    p_pair_experimental = [multi_birthday_probability_experimental(n, 2, k, sample_size=sample_size) for n in N]
    p_triple = [multi_birthday_probability(n, 3, k) for n in N]
    p_triple_ian = [multi_birthday_probability_ian(n, 3, k) for n in N]
    p_triple_experimental = [multi_birthday_probability_experimental(n, 3, k, sample_size=sample_size) for n in N]
    # Find the smallest n such that p >= 0.5. Assuming p is monotonically increasing with N.
    N_half_pair = it.dropwhile(lambda (n, p): p < 0.5, zip(N, p_pair)).next()[0]
    N_triple_pair = it.dropwhile(lambda (n, p): p < 0.5, zip(N, p_triple)).next()[0]
    P.clf()
    P.hold(True)
    P.plot(N, p_pair               , 'b-',
           N, p_pair_ian           , 'g-',
           N, p_pair_experimental  , 'b.',
           N, p_triple             , 'r-',
           N, p_triple_ian         , 'g-',
           N, p_triple_experimental, 'r.')
    P.legend(['m = 2, theoretical', 'm = 2, Ian', 'm = 2, %d samples' % (sample_size,),
              'm = 3, theoretical', 'm = 3, Ian', 'm = 3, %d samples' % (sample_size,)],
             loc='lower right')
    P.grid(True)
    P.xlabel('# People')
    P.ylabel('Probability')
    P.title('Probability of m people with the same birthday')
    y_limits = (-0.01, 1.01)
    P.plot([N_half_pair, N_half_pair], y_limits, 'k--')
    P.plot([N_triple_pair, N_triple_pair], y_limits, 'k--')
    P.ylim(y_limits)
    P.show()
Ejemplo n.º 10
0
def seq_range(seq:Iterable[int],ini:int,fin:int,key:Callable[...,int]=None) -> Iterator[int]:
    """Regresa un generador con los elementos en seq talque
       ini <= x <= fin para los x en seq
       (se asume que seq es una secuencia creciente de números)"""
    if key:
        return itertools.dropwhile(lambda x: ini>key(x), itertools.takewhile(lambda x: key(x)<=fin,seq))
    return itertools.dropwhile(lambda x: ini>x, itertools.takewhile(lambda x: x<=fin,seq))
Ejemplo n.º 11
0
 def _FillPwrSupplies(self, lsCfgData):
     """ Fills power supplies list from output of 'lscfg -vp' saved in a list of strings """
     iterPSData = it.dropwhile(lambda x: not RE_PWRSUPPLY.match(x), lsCfgData)
     self.iPwrSupplies = 0
     try:
         while True:
             sPN = ''                # no P/N on non-local drives
             iterPSData = it.dropwhile(lambda x: not RE_PWRSUPPLY.match(x), iterPSData)
             # we are at first line of disk's description. Let's parse it.
             # sL1 = next(iterPSData).strip()
             next(iterPSData)
             self.iPwrSupplies += 1
             # oLog.debug('_FillPwrSupply: 1st line is {}'.format(sL1))
             sName = 'Power Supply {}'.format(self.iPwrSupplies)
             sL = '--------'   # initialize loop variable
             while sL != '':        # empty line is end of PS record
                 sL = next(iterPSData).strip()
                 if sL[:22] == "Hardware Location Code":
                     sHWLoc = RE_DOTS.split(sL)[1]
                 elif sL[:13] == "Serial Number":
                     sSN = RE_DOTS.split(sL)[1]
                 elif sL[:11] == "Part Number":
                     sPN = RE_DOTS.split(sL)[1]
                 else:
                     pass   # skip unknown lines
             # create PwrSupply object
             self.lPwrSupplies.append(IBM_Power_Supply(sName, sPN, sSN, sHWLoc))
             continue   # while true
     except StopIteration:
         # end of lscfg output, no more Power Supplies
         pass
     return
Ejemplo n.º 12
0
def print_orfs(seq,codon_table_ncbi):
    ''' Finds open reading frames in DNA string, including nested ORFs, and prints them, along with RNA and protein translations'''
    #stop_codons = [rna_to_dna(k) for k, v in codon_table.items() if v == '*']
    stop_codons = codon_table_ncbi.stop_codons
    #start_codons = [rna_to_dna(k) for k, v in codon_table.items() if v == 'M']
    start_codons = codon_table_ncbi.start_codons
    codon_table = codon_table_ncbi.forward_table
    orfs = []
    # if we care about positions:
    #frame1 = zip(itertools.count(),(''.join(k) for k in zip(seq[0::3],seq[1::3],seq[2::3])))
    #frame2 = zip(itertools.count(),(''.join(k) for k in zip(seq[1::3],seq[2::3],seq[3::3])))
    #frame3 = zip(itertools.count(),(''.join(k) for k in zip(seq[2::3],seq[3::3],seq[4::3])))
    
    def chunk3frames(frnum): 
        '''Split up DNA sequence string into triplets, offset by frame '''
        return (''.join(k) for k in zip(seq[0+frnum::3],seq[1+frnum::3],seq[2+frnum::3]))
    
    for frame in map(chunk3frames,range(3)):
        exhausted = False # Are there no more ORFs to find?
        passthrough = itertools.dropwhile(lambda l: l not in start_codons, frame)
        while exhausted is False:
            passthrough, process = itertools.tee(passthrough)
            result = itertools.takewhile(lambda l: l not in stop_codons, process) # this omits the stop codon
            new_orf = list(result)
            passthrough = itertools.dropwhile(lambda l: l not in start_codons, itertools.islice(passthrough,1,None))
            if len(new_orf) > 0:
                orfs.append(new_orf)
            else:
                exhausted = True
                
    return([''.join(orf) for orf in orfs])
Ejemplo n.º 13
0
def stock_data_task(api_object):
    # checkpoint logic
    start_date = datetime.today()
    default_api = api_object.apis[0][0]
    checkpoint, _ = StockDataCheckpoint.objects.get_or_create(
        domain=api_object.domain,
        defaults={
            "api": default_api,
            "date": None,
            "limit": 1000,
            "offset": 0,
            "location": None,
            "start_date": start_date,
        },
    )

    if not checkpoint.api:
        checkpoint.api = default_api

    if not checkpoint.start_date:
        checkpoint.start_date = start_date
        checkpoint.save()

    if not api_object.all_stock_data:
        facilities = api_object.test_facilities
    else:
        facilities = api_object.get_ids()
    if checkpoint.location:
        external_id = api_object.get_last_processed_location(checkpoint)
        if external_id:
            facilities = list(itertools.dropwhile(lambda x: int(x) != int(external_id), facilities))
            process_facility_task(api_object, facilities[0], start_from=checkpoint.api)
            facilities = facilities[1:]

    if not checkpoint.date or checkpoint.location:
        # use subtasks only during initial migration
        facilities_chunked_list = chunked(facilities, 5)
        for chunk in facilities_chunked_list:
            api_object.process_data(process_facility_task, chunk)
    else:
        offset = checkpoint.offset
        for stock_api in itertools.dropwhile(lambda x: x.name != checkpoint.api, api_object.get_stock_apis_objects()):
            stock_api.add_date_filter(checkpoint.date, checkpoint.start_date)
            synchronization(
                stock_api,
                checkpoint,
                checkpoint.date,
                1000,
                offset,
                params={"domain": api_object.domain},
                domain=api_object.domain,
                atomic=True,
            )
            offset = 0

    checkpoint = StockDataCheckpoint.objects.get(domain=api_object.domain)
    save_stock_data_checkpoint(checkpoint, default_api, 1000, 0, checkpoint.start_date, None, False)
    checkpoint.start_date = None
    checkpoint.save()
Ejemplo n.º 14
0
def it_filter():
    # drop_while返回一个迭代器,返回输入可迭代对象中条件第一次为false之后的元素
    print list(it.dropwhile(lambda x: x > 1, iter([0, 1, 2, -1])))
    print list(it.dropwhile(lambda x: x > 1, [0, 1, 2, -1]))

    # take_while返回一个迭代器,返回输入可迭代对象中为True的对象,直到碰到一个False
    print list(it.takewhile(lambda x: x > 1, iter([0, 1, 2, -1])))
    print list(it.takewhile(lambda x: x > 1, iter([2, 1, 2, -1])))
Ejemplo n.º 15
0
def get_bounding_elements(x, l, key=lambda elem: elem, sort=False):
    """
    Get the two elements of a list that bound a value
    """
    if sort:
        l = sorted(l, key=key)
    return (next(itertools.dropwhile(lambda elem: key(elem) > x, reversed(l)), None),
            next(itertools.dropwhile(lambda elem: key(elem) < x, l), None))
Ejemplo n.º 16
0
def parse_docstring(docstring):
    lines = map(lambda x: x.strip(), docstring.split("\n"))
    definitions = list(itertools.dropwhile(lambda x: x, lines))
    summaries = itertools.dropwhile(lambda x: not x, definitions)
    summaries = itertools.takewhile(lambda x: x, summaries)
    summaries = list(summaries)
    summaries = summaries if summaries else list(lines)
    return definitions, summaries
Ejemplo n.º 17
0
    def __init__(self, muon_track, muon_p, muon_losses, frame):
        # Create loss tuples
        self.losses = sorted([[loss.energy, (loss.pos - muon_p.pos)*muon_p.dir, int(loss.type)] for loss in muon_losses], key=lambda x: x[1])

        # Create checkpoints
        self.checkpoints = [(muon_p.energy, 0)]

        muon_pos_i = dataclasses.I3Position(muon_track.xi, muon_track.yi, muon_track.zi)
        self.checkpoints.append((muon_track.Ei, (muon_pos_i - muon_p.pos)*muon_p.dir))

        muon_pos_c = dataclasses.I3Position(muon_track.xc, muon_track.yc, muon_track.zc)
        self.checkpoints.append((muon_track.Ec, (muon_pos_c - muon_p.pos)*muon_p.dir))

        muon_pos_f = dataclasses.I3Position(muon_track.xf, muon_track.yf, muon_track.zf)
        self.checkpoints.append((muon_track.Ef, (muon_pos_f - muon_p.pos)*muon_p.dir))

        self.checkpoints.append((0, muon_p.length))

        # Assign valid checkpoints
        track_cps = self.checkpoints[1:-1]
        self.valid_checkpoints = [self.checkpoints[0]] + [cp for cp in track_cps if cp[0] > 0] + [self.checkpoints[-1]]
        self.valid_checkpoints = sorted(self.valid_checkpoints, key=lambda x: x[1])
        
        # Add loss sums to losses
        next_dist = 0
        total = 0
        for j in xrange(len(self.losses)):
            if self.losses[j][1] >= next_dist:
                next_dist = next(itertools.dropwhile(lambda cp: cp[1] <= self.losses[j][1], self.valid_checkpoints), (None, np.inf))[1]
                total = 0
            total += self.losses[j][0]
            self.losses[j] = tuple(self.losses[j] + [total])

        self.loss_rates = []
        self.loss_ranges = []
        for i in xrange(0, len(self.valid_checkpoints)-1):
            cp1 = self.valid_checkpoints[i]
            cp2 = self.valid_checkpoints[i+1]
            first_index = next(itertools.dropwhile(lambda l: l[1][1] <= cp1[1], enumerate(self.losses)), [-1])[0]
            last_index = len(self.losses) - 1 - next(itertools.dropwhile(lambda l: l[1][1] >= cp2[1], enumerate(reversed(self.losses))), [0])[0]

            if last_index < 0:
                total_stochastic_loss = 0
            else:
                total_stochastic_loss = self.losses[last_index][3]
            try:
                loss_rate = (cp1[0] - cp2[0] - total_stochastic_loss) / (cp2[1] - cp1[1])
            except:
                print self.checkpoints
                print self.valid_checkpoints
                print 'i: %d' % i
                print muon_p
                print frame['I3EventHeader']
                print frame['I3EventHeader'].run_id
                print frame['I3EventHeader'].event_id
                raise
            self.loss_rates.append(loss_rate)
            self.loss_ranges.append((first_index, last_index+1))
Ejemplo n.º 18
0
 def _load(self):
     is_header = lambda line: not line.startswith('<')
     parser = OFXParser(self, accounts_only=True)
     for line in dropwhile(is_header, self.lines):
         parser.feed(line)
     parser.close()
     parser = OFXParser(self)
     for line in dropwhile(is_header, self.lines):
         parser.feed(line)
     parser.close()
Ejemplo n.º 19
0
def regex_chunk(lines, regex):
    # type: (List[str], Pattern[str]) -> List[List[str]]
    lst = list(itertools.dropwhile(lambda x: not regex.match(x), lines))
    arr = []
    while lst:
        ret = [lst[0]]+list(itertools.takewhile(lambda x: not regex.match(x),
                                                lst[1:]))
        arr.append(ret)
        lst = list(itertools.dropwhile(lambda x: not regex.match(x),
                                       lst[1:]))
    return arr
Ejemplo n.º 20
0
def strip_rows(lines):
    '''
    returns an iterator of lines with leading and trailing blank cells
    removed
    '''
    isblank = lambda s: s == ''
    for line in lines:
        leading_dropped = list(itertools.dropwhile(isblank, line))
        rev_line = list(itertools.dropwhile(isblank,
                                            reversed(leading_dropped)))
        yield list(reversed(rev_line))
Ejemplo n.º 21
0
def trianglepentagonhexagons():
	triang_iter = triangles()
	pentag_iter = pentagons()
	for hexag in hexagons():
		for pentag in dropwhile(lambda p: p < hexag, pentag_iter):
			if pentag == hexag:
				for triang in dropwhile(lambda t: t < hexag,triang_iter):
					if triang == hexag:
						yield hexag
					break
			break
Ejemplo n.º 22
0
 def remove_blocks(name, iterable):
     start, end = BLOCK_START % name, BLOCK_END % name
     it = iter(iterable)
     while True:
         line = next(it)
         while line != start:
             yield line
             line = next(it)
         it = tail(itertools.dropwhile(not_eq(end), it))
         if remove_empty_next:
             it = itertools.dropwhile(lambda el: not el.strip(), it)
Ejemplo n.º 23
0
def accuDetail(correct, total, legend, ylim = 100, treshold=1000) :
    get_index = lambda cs : list(dropwhile(lambda (i,c) : sum(c) < treshold,enumerate(cs)))[0][0]
    indices = [get_index(cs) for cs in correct]
    print(indices)
    
    for c,t,l,i in zip(correct, total, legend, indices) :
        print("%s:\t%i of %i\t(%.2f%%)" % (l, sum(c[i]), sum(t[i]), 100*sum(c[i])/float(sum(t[i]))))
        
    get_accu = lambda ts,cs,index : [1 if t == 0 else c/float(t) for t,c in zip(ts[index], cs[index])]
    accu = [get_accu(ts, cs, index) for ts,cs,index in zip(total, correct, indices)]
    accuHist(accu, legend, ylim=ylim)
Ejemplo n.º 24
0
def strip_rows(lines):
    """
    returns an iterator of lines with leading and trailing blank (empty or
    which contain only space) cells.
    """
    isblank = lambda s: s == '' or s.isspace()
    for line in lines:
        leading_dropped = list(itertools.dropwhile(isblank, line))
        rev_line = list(itertools.dropwhile(isblank,
                                            reversed(leading_dropped)))
        yield list(reversed(rev_line))
Ejemplo n.º 25
0
def stencil(**kwargs):
    """
    Applying genotype calls to multi-way alignment incidence matrix

    :param alnfile: alignment incidence file (h5),
    :param gtypefile: genotype calls by GBRS (tsv),
    :param grpfile: gene ID to isoform ID mapping info (tsv)
    :return: genotyped version of alignment incidence file (h5)
    """
    alnfile = kwargs.get('alnfile')
    gtypefile = kwargs.get('gtypefile')
    grpfile = kwargs.get('grpfile')
    if grpfile is None:
        grpfile2chk = os.path.join(DATA_DIR, 'ref.gene2transcripts.tsv')
        if os.path.exists(grpfile2chk):
            grpfile = grpfile2chk
        else:
            print >> sys.stderr, '[gbrs::stencil] A group file is *not* given. Genotype will be stenciled as is.'

    # Load alignment incidence matrix ('alnfile' is assumed to be in multiway transcriptome)
    alnmat = emase.AlignmentPropertyMatrix(h5file=alnfile, grpfile=grpfile)

    # Load genotype calls
    hid = dict(zip(alnmat.hname, np.arange(alnmat.num_haplotypes)))
    gid = dict(zip(alnmat.gname, np.arange(len(alnmat.gname))))
    gtmask = np.zeros((alnmat.num_haplotypes, alnmat.num_loci))
    gtcall_g = dict.fromkeys(alnmat.gname)
    with open(gtypefile) as fh:
        if grpfile is not None:
            gtcall_t = dict.fromkeys(alnmat.lname)
            for curline in dropwhile(is_comment, fh):
                item = curline.rstrip().split("\t")
                g, gt = item[:2]
                gtcall_g[g] = gt
                hid2set = np.array([hid[c] for c in gt])
                tid2set = np.array(alnmat.groups[gid[g]])
                gtmask[np.meshgrid(hid2set, tid2set)] = 1.0
                for t in tid2set:
                    gtcall_t[alnmat.lname[t]] = gt
        else:
            for curline in dropwhile(is_comment, fh):
                item = curline.rstrip().split("\t")
                g, gt = item[:2]
                gtcall_g[g] = gt
                hid2set = np.array([hid[c] for c in gt])
                gtmask[np.meshgrid(hid2set, gid[g])] = 1.0

    alnmat.multiply(gtmask, axis=2)
    for h in xrange(alnmat.num_haplotypes):
        alnmat.data[h].eliminate_zeros()
    outfile = kwargs.get('outfile')
    if outfile is None:
        outfile = 'gbrs.stenciled.' + os.path.basename(alnfile)
    alnmat.save(h5file=outfile)
Ejemplo n.º 26
0
def normalise_text(text):
    """
    Removes leading and trailing whitespace from each line of text.
    Removes leading and trailing blank lines from text.
    """
    stripped = text.strip()
    stripped_lines = [line.strip() for line in text.split("\n")]
    # remove leading and trailing empty lines
    stripped_head = list(itertools.dropwhile(lambda s: not s, stripped_lines))
    stripped_tail = itertools.dropwhile(lambda s: not s, reversed(stripped_head))
    return "\n".join(reversed(list(stripped_tail)))
Ejemplo n.º 27
0
def check_file_neon(fn, source):
    lines = enumerate(source.split("\n"), 1)
    for incomment, doc in itertools.groupby(lines, lambda line: len(line[1]) >= 2 and line[1][1] == "|"):
        if incomment:
            full = list(doc)
            doc = itertools.dropwhile(lambda x: "Example:" not in x, [x[1] for x in full])
            try:
                next(doc)
                test("IMPORT {}\n".format(os.path.basename(fn)[:-5]) + "\n".join(re.sub(r"^ \|\s*[>:|]", "", x) for x in doc if x.startswith(" | ")))
            except StopIteration:
                firstline = next(itertools.dropwhile(lambda x: not x[1][3:].strip(), full))
                undocumented.append("no example in {}:{} for {}".format(fn, firstline[0], firstline[1][3:].strip()))
Ejemplo n.º 28
0
def dfs_singles(r, path, a, b, depth=0, debug=False):
    '''A leaf of the depth-first search in combinations of large primes: depth-first search
    within ''singles'' = element of the ''unknown'' array.'''
    if abs(r) < 1e-12: yield path
    elif r > 0:
        i_min, i_max = 0, len(a)
        try: i_min = it.dropwhile(lambda (_, x): 1 - 1e-10 > r * x * x, enumerate(a)).next()[0]
        except StopIteration: i_min = len(a)
        try: i_max = it.dropwhile(lambda (_, x): x > r - 1e-10, enumerate(b)).next()[0]
        except StopIteration: i_max = len(a)
        for i in xrange(i_min, i_max):
            for p in dfs_singles(r - 1. / (a[i] * a[i]), path | set([a[i]]), a[i + 1:], b[i + 1:], debug=debug): yield p
Ejemplo n.º 29
0
def problem49():
    primes = primesUpTo(10000)
    for a in dropwhile(lambda x: x <= 1487, primes):
        # the bound comes from wanting c = 2b - a ≤ 10000
        for b in dropwhile(lambda x: x <= a, primes):
            if b >= (10000 + a) / 2:
                break
            c = b + (b - a)
            # Do the same digits first since I think that is the most time
            # consuming part
            if sameDigits(a, b) and sameDigits(b, c) and isPrime(c):
                return int(str(a) + str(b) + str(c))
Ejemplo n.º 30
0
def populate_report_data(start_date, end_date, domain, runner, locations=None, strict=True):
    # first populate all the warehouse tables for all facilities
    # hard coded to know this is the first date with data
    start_date = max(start_date, default_start_date())

    # For QA purposes generate reporting data for only some small part of data.
    if not ILSGatewayConfig.for_domain(domain).all_stock_data:
        if locations is None:
            locations = _get_test_locations(domain)
        facilities = filter(lambda location: location.location_type == 'FACILITY', locations)
        non_facilities_types = ['DISTRICT', 'REGION', 'MSDZONE', 'MOHSW']
        non_facilities = []
        for location_type in non_facilities_types:
            non_facilities.extend(filter(lambda location: location.location_type == location_type, locations))
    else:
        facilities = Location.filter_by_type(domain, 'FACILITY')
        non_facilities = list(Location.filter_by_type(domain, 'DISTRICT'))
        non_facilities += list(Location.filter_by_type(domain, 'REGION'))
        non_facilities += list(Location.filter_by_type(domain, 'MSDZONE'))
        non_facilities += list(Location.filter_by_type(domain, 'MOHSW'))

    if runner.location:
        if runner.location.location_type.name.upper() != 'FACILITY':
            facilities = []
            non_facilities = itertools.dropwhile(
                lambda location: location._id != runner.location.location_id,
                non_facilities
            )
        else:
            facilities = itertools.dropwhile(
                lambda location: location._id != runner.location.location_id,
                facilities
            )

    facilities_chunked_list = chunked(facilities, 5)
    for chunk in facilities_chunked_list:
        res = chain(process_facility_warehouse_data.si(fac, start_date, end_date, runner) for fac in chunk)()
        res.get()

    non_facilities_chunked_list = chunked(non_facilities, 50)

    # then populate everything above a facility off a warehouse table
    for chunk in non_facilities_chunked_list:
        res = chain(
            process_non_facility_warehouse_data.si(org, start_date, end_date, runner, strict)
            for org in chunk
        )()
        res.get()
    runner.location = None
    runner.save()
    # finally go back through the history and initialize empty data for any
    # newly created facilities
    update_historical_data(domain)
Ejemplo n.º 31
0
def insta_posts_py(query,
                   scope,
                   max_posts,
                   scrape_comments,
                   save_path="",
                   since="",
                   until=""):
    """
	Run custom search

	Fetches data from Instagram via instaloader.
	"""
    # this is useful to include in the results because researchers are
    # always thirsty for them hashtags
    hashtag = re.compile(r"#([^\s,.+=-]+)")
    mention = re.compile(r"@([a-zA-Z0-9_]+)")

    queries = query.split(",")

    if since != "" and until != "":
        since = since.split("-")
        until = until.split("-")

        for item in range(len(since)):
            since[item] = int(since[item])

        for item in range(len(until)):
            until[item] = int(until[item])

        since = datetime(since[0], since[1], since[2])
        until = datetime(until[0], until[1], until[2])

# return queries
    posts = []

    # for each query, get items
    for query in queries:
        chunk_size = 0
        print("Retrieving posts ('%s')" % query)
        try:
            if scope == "hashtag":
                query = query.replace("#", "")
                hashtag_obj = instaloader.Hashtag.from_name(
                    instagram.context, query)
                chunk = hashtag_obj.get_posts()
            elif scope == "username":
                query = query.replace("@", "")
                profile = instaloader.Profile.from_username(
                    instagram.context, query)
                chunk = profile.get_posts()
            else:
                print("Invalid search scope for instagram scraper: %s" %
                      repr(scope))
                return []

            # "chunk" is a generator so actually retrieve the posts next
            posts_processed = 0
            # go through posts, and retrieve comments
            results = []
            results_posts = []
            for post in chunk:

                chunk_size += 1
                print("Retrieving posts ('%s', %i posts)" %
                      (query, chunk_size))
                if posts_processed >= max_posts:
                    break
                try:
                    posts.append(chunk.__next__())
                    posts_processed += 1

                    comments_bit = " and comments" if scrape_comments == True else ""

                    if since != "" and until != "":
                        posts = takewhile(
                            lambda p: p.date > until,
                            dropwhile(lambda p: p.date > since, posts))

                    for post in posts:

                        print("Retrieving metadata%s for post %i" %
                              (comments_bit, posts_processed))

                        thread_id = post.shortcode
                        try:
                            results_posts.append({
                                "id":
                                str(thread_id),
                                "thread_id":
                                str(thread_id),
                                "parent_id":
                                str(thread_id),
                                "body":
                                post.caption
                                if post.caption is not None else "",
                                "author":
                                post.owner_username,
                                "timestamp":
                                post.date_utc.timestamp(),
                                "type":
                                "video" if post.is_video else "picture",
                                "url":
                                post.video_url if post.is_video else post.url,
                                "thumbnail_url":
                                post.url,
                                "hashtags":
                                ",".join(post.caption_hashtags),
                                "usertags":
                                ",".join(post.tagged_users),
                                "mentioned":
                                ",".join(
                                    mention.findall(post.caption) if post.
                                    caption else ""),
                                "num_likes":
                                post.likes,
                                "num_comments":
                                post.comments,
                                "level":
                                "post",
                                "query":
                                query
                            })
                        except (instaloader.QueryReturnedNotFoundException,
                                instaloader.ConnectionException):
                            pass

                        if not scrape_comments == True:
                            if save_path != "":
                                save_csv(save_path, results_posts)
                            results.append(results_posts)
                            continue

                        if (posts_processed % 10 == 0):
                            wait_time = randint(300, 500)
                            print("Wating for " + str(wait_time) + " seconds.")
                            sleep(wait_time)
                        else:
                            wait_time = randint(20, 30)
                            print("Wating for " + str(wait_time) + " seconds.")
                            sleep(wait_time)

                        try:
                            for comment in post.get_comments():
                                answers = [
                                    answer for answer in comment.answers
                                ]

                                try:
                                    results_posts.append({
                                        "id":
                                        str(comment.id),
                                        "thread_id":
                                        str(thread_id),
                                        "parent_id":
                                        str(thread_id),
                                        "body":
                                        comment.text,
                                        "author":
                                        comment.owner.username,
                                        "timestamp":
                                        comment.created_at_utc.timestamp(),
                                        "type":
                                        "comment",
                                        "url":
                                        "",
                                        "hashtags":
                                        ",".join(hashtag.findall(
                                            comment.text)),
                                        "usertags":
                                        "",
                                        "mentioned":
                                        ",".join(mention.findall(
                                            comment.text)),
                                        "num_likes":
                                        comment.likes_count if hasattr(
                                            comment, "likes_count") else 0,
                                        "num_comments":
                                        len(answers),
                                        "level":
                                        "comment",
                                        "query":
                                        query
                                    })
                                except instaloader.QueryReturnedNotFoundException:
                                    pass

                                # instagram only has one reply depth level at the time of
                                # writing, represented here
                                for answer in answers:
                                    try:
                                        results_posts.append({
                                            "id":
                                            str(answer.id),
                                            "thread_id":
                                            str(thread_id),
                                            "parent_id":
                                            str(comment.id),
                                            "body":
                                            answer.text,
                                            "author":
                                            answer.owner.username,
                                            "timestamp":
                                            answer.created_at_utc.timestamp(),
                                            "type":
                                            "comment",
                                            "url":
                                            "",
                                            "hashtags":
                                            ",".join(
                                                hashtag.findall(answer.text)),
                                            "usertags":
                                            "",
                                            "mentioned":
                                            ",".join(
                                                mention.findall(answer.text)),
                                            "num_likes":
                                            answer.likes_count if hasattr(
                                                answer, "likes_count") else 0,
                                            "num_comments":
                                            0,
                                            "level":
                                            "answer",
                                            "query":
                                            query
                                        })
                                    except instaloader.QueryReturnedNotFoundException:
                                        pass
                        except instaloader.QueryReturnedNotFoundException:
                            pass
                        if save_path != "":
                            save_csv(save_path, results_posts)
                except instaloader.QueryReturnedNotFoundException:
                    pass
        except (instaloader.QueryReturnedNotFoundException,
                instaloader.ConnectionException):
            # data not available...? this happens sometimes, not clear why
            pass
        results.append(results_posts)
    return results
Ejemplo n.º 32
0
 def burn_blanks(c):
     return list(dropwhile(self._is_blank, c))
Ejemplo n.º 33
0
	def __call__(self, iterator):
		return itertools.dropwhile(self.function, iterator)
Ejemplo n.º 34
0
l_1 = [1, 2, 3, 4, 5, 6, 7, 8]
print('List: ', l_1)
print('Filtered Elements greater than 4: ', list(filter(lambda x: x > 4, l_1)))

print(
    '\n------------------------------ Compress Filter ------------------------------'
)

l_2 = ['a', 'b', 'c', 'd', 'e']
selectors = [True, False, 1, 0]

print(f'Using Compress Filter on: {l_2}\nUsing Selector: {selectors}')
# 1 to 1 mapping, like zip: a -> True, b -> False, c -> 1, d -> 0, e -> None
print('Compress Filtered: ', list(itertools.compress(l_2, selectors)))

print(
    '\n------------------------------ Takewhile Filter ------------------------------'
)

l_3 = [1, 3, 5, 2, 0]
print('List: ', l_3)
print('Takewhile for (< 5): ', list(itertools.takewhile(lambda x: x < 5, l_3)))

print(
    '\n------------------------------ Dropwhile Filter ------------------------------'
)

l_4 = [1, 3, 5, 2, 0]
print('List: ', l_4)
print('Dropwhile for (< 5): ', list(itertools.dropwhile(lambda x: x < 5, l_4)))
Ejemplo n.º 35
0
    def __init__(self, experiment_config_name,
                 extra_dict={}, config_roots=[''], getexp=False):
        '''Read experiment config to get basic settings
        
        TODO: probably nicer if default experiment is given as argument
        '''

        # State variables
        self.version_info_missing = False

        #
        # Helper functions
        #

        def split_jobs(config):
            '''Post-process job definition to allow for shared configs as [[job1, job2]]'''
            if 'jobs' in config:
                sep = re.compile(r'\s*,\s*')
                for subjobs, subconfig in config['jobs'].iteritems():
                    if re.search(sep, subjobs):
                        for subjob in re.split(sep, subjobs):
                            if subjob in config['jobs']:
                                config['jobs'][subjob].merge(subconfig.dict())
                            else:
                                config['jobs'][subjob] = subconfig.dict()
                        del config['jobs'][subjobs]

        def get_config_name(lib_name, base_name):
            '''Cycle through config path until a match is found.
               
               Return simple path otherwise'''
            config_name = os.path.join(lib_name, base_name)
            for config_root in config_roots:
                tentative_name = os.path.join(config_root, config_name)
                if os.path.exists(tentative_name):
                    config_name = tentative_name
                    break
            return config_name

        def read_value(value):
            if os.path.exists(value):
                stream = open(value)
                result = stream.read().strip()
                stream.close()
            else:
                result = ''
            return result

        def sec2time(seconds):
            '''Create time string (HH:MM:SS) from second of day'''
            seconds = int(seconds)
            if seconds >= 86400:
                raise ValueError("invalid second of day '{0}'".format(seconds))
            minutes, s = divmod(seconds, 60)
            h, m = divmod(minutes, 60)
            return "{0:02}:{1:02}:{2:02}".format(h, m, s)

        def split_date(value):
            '''Re-format datetime string to list for use in namelists'''
            match = re.match(r'^0*(\d+)-0*(\d+)-0*(\d+)'
                             r'([T ]0*(\d+)(:0*(\d+)(:0*(\d+))?)?)?$', value)
            if match:
                return [match.groups('0')[i] for i in [0,1,2,4,6,8]]

            match = re.match(r'^0*(\d+?)(\d{2})(\d{2})'
                             r'([T ]0*(\d+)(:0*(\d+)(:0*(\d+))?)?)?$', value)
            if match:
                return [match.groups('0')[i] for i in [0,1,2,4,6,8]]
                
            raise ValueError("invalid date/time '{0}'".format(value))

        def add_years(value, years):
            '''Add specified number of years (possible negative) to date'''
            years = int(years)
            dt = map(int, split_date(value))
            dt[0] += years
            return "{0:+05}-{1:02}-{2:02}".format(*dt).lstrip('+')

        def add_days(value, days):
            '''Add specified number of days (possible negative) to date'''
            def leap(year):
                return (not year % 4) and (not (not year % 100) or (not year % 400)) 
            def monlen(year, mon):
                monlens = (0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 0)
                return monlens[mon] + (mon == 2 and leap(year))
            def add_days_(year, mon, day, days):
                while True:
                    if mon == 0:
                        year -= 1
                        mon = 12
                        day = monlen(year, 12)
                        continue
                    if mon == 13:
                        year += 1
                        mon = 1
                        day = 1
                        continue
                    if day + days <= 0:
                        days += day
                        mon -= 1
                        day = monlen(year, mon)
                        continue
                    if day + days > monlen(year, mon):
                        days -= monlen(year, mon) - day + 1
                        mon += 1
                        day = 1
                        continue
                    day += days
                    break

                return (year, mon, day)

            days = int(days)
            dt = map(int, split_date(value))
            dt = add_days_(dt[0], dt[1], dt[2], days)
            return "{0:+05}-{1:02}-{2:02}".format(*dt).lstrip('+')

        def eval_value(value):
            '''
                Evaluate key as python expression,
                return as string or sequence of strings.
            '''
            result = eval(value)
            if isinstance(result, (list, tuple)):
                result = map(str, result)
            else:
                result = str(result)
            return result

        def eval_value_string(value):
            '''
                Evaluate key as python expression,
                return as string or sequence of strings.
            '''
            result = eval_value(value)
            if isinstance(result, (list, tuple)):
                result = ", ".join(result)
            return result

        def eval_expression(value):
            '''
                Check if value is a supported expression.
                If so, evaluate and return result, otherwise just pass through.
            '''
            match = re.match(r'^eval\((.*)\)$', value, re.S)
            if match:
                return eval_value(match.group(1))

            match = re.match(r'^evals\((.*)\)$', value, re.S)
            if match:
                return eval_value_string(match.group(1))

            match = re.match(r'^add_(years|days)\(\s*([-\d]+([T ][\d:]+)?)\s*,\s*([-+]?\d+)\s*\)$', value, re.S)
            if match:
                if match.group(1) == 'days':
                    return add_days(match.group(2), match.group(4))
                return add_years(match.group(2), match.group(4))

            match = re.match(r'^split_date\((.*)\)$', value, re.S)
            if match:
                return split_date(match.group(1))

            match = re.match(r'^sec2time\((.*)\)$', value, re.S)
            if match:
                return sec2time(match.group(1))

            match = re.match(r'^read\((.*)\)$', value, re.S)
            if match:
                return read_value(match.group(1))

            return value

        # Interpolate and evaluate keys if they are an expression
        def eval_key(section, key):
            try:
                value = section[key]
                if isinstance(value, (list, tuple)):
                    value = map(eval_expression, value)
                elif isinstance(value, basestring):
                    value = eval_expression(value)
                if isinstance(value, (list, tuple)):
                    value = [v.replace('$', '$$') for v in value]
                elif isinstance(value, basestring):
                    value = value.replace('$', '$$')
            except (InterpolationError, ValueError) as error:
                raise ExpConfigError(error.message, key)
            section[key] = value

        # Undo remaining changes from walk with eval_key
        def uneval_key(section, key):
            try:
                value = section[key]
                if isinstance(value, (list, tuple)):
                    value = [v.replace('$$', '$') for v in value]
                elif isinstance(value, basestring):
                    value = value.replace('$$', '$')
            except (InterpolationError, ValueError) as error:
                raise ExpConfigError(error.message, key)
            section[key] = value

        # Move version info from local config to global list
        def register_version(pre_config, config_versions):
            if 'VERSION_' in pre_config:
                config_versions.append(pre_config['VERSION_'])
                del pre_config['VERSION_']
            else:
                self.version_info_missing = True

        #
        # Method body
        #

        # Pre-read basic experiment settings

        pre_config = None
        setup_config_name = get_config_name('', ExpConfig.setup_config_name)
        if os.path.exists(setup_config_name):
            pre_config = ConfigObj(setup_config_name, interpolation=False)
        user_config = ConfigObj(experiment_config_name, interpolation=False)
        if pre_config:
            pre_config.merge(user_config)
        else:
            pre_config = user_config

        experiment_type = extra_dict.get('EXP_TYPE', pre_config['EXP_TYPE'])
        # Empty environment should load default
        environment = extra_dict.get('ENVIRONMENT', 
                      pre_config.get('ENVIRONMENT',
                      ExpConfig.default_name))
        # Options should always be treated as a list
        setup_options = extra_dict.get('SETUP_OPTIONS',
                        pre_config.get('SETUP_OPTIONS',
                        ''))
        if isinstance(setup_options, basestring):
            if setup_options:
                setup_options = [setup_options]
            else:
                setup_options = []
        exp_options = extra_dict.get('EXP_OPTIONS',
                      pre_config.get('EXP_OPTIONS',
                      ''))
        if isinstance(exp_options, basestring):
            if exp_options:
                exp_options = [exp_options]
            else:
                exp_options = []
        options = setup_options + exp_options
        # Backwards compatibility ENVIRONMENT -> QUEUE_TYPE
        if environment == ExpConfig.default_name and 'QUEUE_TYPE' in pre_config:
            feedback.warning("found obsolete keyword 'QUEUE_TYPE'; "
                             "should be replaced by 'ENVIRONMENT'")
            environment = pre_config['QUEUE_TYPE']
        # Load default if environment was deliberately set to empty
        if not environment:
            environment = ExpConfig.default_name

        pre_config = None
        user_config = None

        # Start from empty configuration

        pre_config = ConfigObj(interpolation=False)
        config_versions = []

        # Get default experiment id from file name
        pre_config[ExpConfig.id_name] = os.path.splitext(
            os.path.basename(experiment_config_name)
        )[0]

        # Read Environment

        env_dict = dict(os.environ)
        if not getexp:
            # Mask literal dollar characters
            for key, value in env_dict.iteritems():
                env_dict[key] = value.replace('$', '$$')
        pre_config.merge({'DEFAULT': {}})
        for key, value in sorted(env_dict.iteritems()):
            pre_config['DEFAULT'][key] = value

        # Read experiment settings from library (default and type specific)

        lib_config_name = get_config_name(ExpConfig.exp_lib_dir,
                                          ExpConfig.default_name+'.config')
        pre_config.merge(ConfigObj(lib_config_name, interpolation=False))
        split_jobs(pre_config)
        register_version(pre_config, config_versions)

        if os.path.exists(setup_config_name):
            pre_config.merge(ConfigObj(setup_config_name, interpolation=False))
            split_jobs(pre_config)
            register_version(pre_config, config_versions)

        lib_config_name = get_config_name(ExpConfig.exp_lib_dir, 
                                          experiment_type+'.config')
        if os.path.exists(lib_config_name):
            pre_config.merge(ConfigObj(lib_config_name, interpolation=False))
            split_jobs(pre_config)
            register_version(pre_config, config_versions)
        else:
            feedback.warning("cannot find experiment config for '%s', "+
                             "using default only", experiment_type)

        for option in options:
            lib_config_name = get_config_name(ExpConfig.opt_lib_dir, 
                                              option+'.config')
            if os.path.exists(lib_config_name):
                pre_config.merge(ConfigObj(lib_config_name, interpolation=False))
                split_jobs(pre_config)
                register_version(pre_config, config_versions)
            else:
                feedback.warning("cannot find config for option '%s', using "+
                                 "default/experiment type only", option)

        # Read host environment settings from library

        lib_config_name = get_config_name(ExpConfig.env_lib_dir,
                                          environment+'.config')

        if os.path.exists(lib_config_name):
            pre_config.merge(ConfigObj(lib_config_name, interpolation=False))
            register_version(pre_config, config_versions)

        # Warn user if at least one config had no version info
        if self.version_info_missing:
            feedback.info("version info for standard config is incomplete")

        # Re-read config to allow overriding default settings
        # TODO: probably nicer if default experiment is given as argument
        experiment_config = ConfigObj(experiment_config_name,
                                      interpolation=False)
        pre_config.merge(experiment_config)
        split_jobs(pre_config)

        # Add extra dictionary
        pre_config.merge(extra_dict)

        # Backwards compatibility ENVIRONMENT -> QUEUE_TYPE
        pre_config['ENVIRONMENT'] = environment

        # Add complete versioning info
        if not getexp:
            pre_config['VERSIONS_'] = config_versions

        # Re-read merged config with interpolation set.
        # This works around incomprehensible inheritance of interpolation with
        # merge. Make sure that all values are interpolated

        config_lines = StringIO.StringIO()

        pre_config.write(config_lines)
        pre_config = None

        config_lines.seek(0)
        pre_config = ConfigObj(config_lines,
                               interpolation=False if getexp else 'template')

        # Extract experiment description from initial comment
        # if not set explicitly
        if not pre_config.has_key('EXP_DESCRIPTION'):
            is_empty = lambda s: re.match(r'^[\s#]*$', s)
            rm_comment = lambda s: re.sub(r'^\s*# ?', '', s)       
            pre_config['EXP_DESCRIPTION'] = "\n".join(
                reversed(list(
                    dropwhile(is_empty,
                        reversed(list(
                            dropwhile(is_empty,
                                map(rm_comment,
                                    experiment_config.initial_comment)
                            )
                        )) 
                    )
                ))
            )

        pre_config.walk(eval_key)

        # Re-read final config without interpolation.
        # This allows copying data without evaluation of version keywords.

        config_lines.seek(0)
        config_lines.truncate()

        pre_config.write(config_lines)
        pre_config = None

        config_lines.seek(0)
        ConfigObj.__init__(self, config_lines, interpolation=False)
        self.walk(uneval_key)
        
        self.experiment_id = self[ExpConfig.id_name]
        self.experiment_kind = re.sub(r'-\w+$', '', experiment_type)
Ejemplo n.º 36
0
def drop_until(iterable, low):
    for i in iterable:
        if i < low:
            continue
        yield i


def take_until(iterable, high):
    for i in iterable:
        if i > high:
            break
        yield i


def take_between_v2(iterable, low, high):
    for i in take_until(drop_until(iterable, low), high):
        yield i


for i in take_between_v2(take_even(fibonacci()), 100, 1000):
    print(i)

print(
    list(
        itertools.takewhile(
            lambda i: i <= 1000,
            itertools.dropwhile(lambda i: i < 100,
                                filter(lambda i: i % 2 == 0, fibonacci())),
        )))
Ejemplo n.º 37
0
def hash_test_code(main_path):
    """Hashes file main_path."""
    with open(main_path) as main:
        test_code_hash = hashlib.sha256()
        for line in main:
            test_code_hash.update(line.encode())
    return test_code_hash.hexdigest()


PROFESSOR_TEST_CODE_HEXDIGEST = '22c0d504a3335886a369d75f72f07474b1d10599c294b1b45770e9ffdbc43b95'
PROFESSOR_CHIFFRE_HEXDIGEST = '60ff41b09e4e1011d3a5f33704ec53df319a248d1de48250a131b809a85cb2db'
PROFESSOR_CLAIR_HEXDIGEST = '4ef57703aad7ffd9f3129bb46c81a15308f1963e1f12ab00718f3569fde090f3'
CALLBACKS = pygit2.RemoteCallbacks(credentials=pygit2.KeypairFromAgent("git"))

with open('depots.txt') as remote_depot_names:
    for remote_depot_name in itertools.dropwhile(
            lambda line: line.startswith('#'), remote_depot_names):
        try:
            # Craft URL to clone given a depot name.
            remote_depot_name = remote_depot_name.rstrip()
            remote_depot_url = 'ssh://[email protected]/' + remote_depot_name + '.git'
            local_depot_path = remote_depot_name.replace('/', '-')
            print(local_depot_path, end=' ')

            # Clone the repo.
            if pygit2.clone_repository(remote_depot_url, local_depot_path, callbacks=CALLBACKS) \
                    is None:
                raise RuntimeError('-1')

            # Confirm test code is intact.
            if hash_test_code(local_depot_path + '/test/main.c') != PROFESSOR_TEST_CODE_HEXDIGEST or \
               hash_test_code(local_depot_path + '/test/chiffre.txt') != PROFESSOR_CHIFFRE_HEXDIGEST or \
Ejemplo n.º 38
0
 def skip(iter, N):
     from itertools import dropwhile
     return dropwhile(lambda n_rec: n_rec[0] < N, enumerate(iter))
Ejemplo n.º 39
0
assert next(a) == 'b'
assert next(a) == 'c'
assert next(a) == 'a'

a = itertools.cycle(range(3))
assert next(a) == 0
assert next(a) == 1
assert next(a) == 2
assert next(a) == 0
assert next(a) == 1
assert next(a) == 2

#########################
## Tests for dropwhile ##
#########################
a = itertools.dropwhile(lambda x: x < 5, [1, 4, 6, 4, 1])
assert next(a) == 6
assert next(a) == 4
assert next(a) == 1

a = itertools.dropwhile(lambda x: x == 'p', 'pbrython')
assert list(a) == ['b', 'r', 'y', 't', 'h', 'o', 'n']

###########################
## Tests for filterfalse ##
###########################
a = itertools.filterfalse(lambda x: x % 2, range(10))
assert next(a) == 0
assert next(a) == 2
assert next(a) == 4
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#   @Time : 2021/10/19 上午10:57
#   @Author : liuzh
#   @desc :
from itertools import dropwhile

# 跳过开始地方的注释行
with open('4.8.txt') as f:
    for line in dropwhile(lambda line: line.startswith('#'), f):
        print(line, end='')

# 如果已知跳过的行号[3:]
from itertools import islice

items = ['a', 'b', 'c', 1, 4, 10, 15]
# 相当于:[3:]
for x in islice(items, 3, None):
    print(x)

# 如果互换None和3的位置:[:3]
for x in islice(items, None, 3):
    print(x)

# 原来写法
with open('4.8.txt') as f:
    # Skip over initial comments
    while True:
        line = next(f, '')
        if not line.startswith('#'):
            break
    glbs.MinTumorDepth = 20
    glbs.MinNormalDepth = 8
    glbs.MultipleTumorNormalFreq = 5
    glbs.MinAD = 5
    glbs.MinAltFreq = 0.01

fin = open(args.maf)
ofh = open(args.output, "wt")

comments = ifilter(lambda L: L.startswith("#"), fin)
for line in comments:
    ofh.write(line)

fin.seek(0)

start = dropwhile(lambda L: L.lower().lstrip().startswith('#'), fin)
cin = csv.DictReader(start, delimiter="\t")
fnames = cin.fieldnames
fnames.append("Caller")
cout = csv.DictWriter(ofh, fieldnames=fnames, delimiter="\t")
cout.writeheader()

for recDict in cin:
    try:
        recDict["Caller"] = args.caller
        rec = Struct(**recDict)

        ## For both somatic and not somatic
        if rec.FILTER and rec.FILTER.find("LowQual") > -1:
            continue
Ejemplo n.º 42
0
def vowel(c):
    return c.lower() in 'aeiou'


print('{:*^60}'.format('filter()'))
print(list(filter(vowel, 'Aardvark')))
print(list(filter(lambda c: c.lower() in 'aeiou', 'Aardvark')))

import itertools
print('{:*^60}'.format('itertools.filterfalse()'))
print(list(itertools.filterfalse(vowel, 'Aardvark')))

print('{:*^60}'.format('itertools.dropwhile()'))
print(list(itertools.dropwhile(vowel, 'Aardvark')))

print('{:*^60}'.format('itertools.dropwhile()'))
print(list(itertools.takewhile(vowel, 'Aardvark')))

print('{:*^60}'.format('itertools.compress()'))
print(list(itertools.compress('Aardvark', (1, 0, 1, 1, 0, 1))))

print('{:*^60}'.format('itertools.islice(, 4)'))
print(list(itertools.islice('Aardvark', 4)))

print('{:*^60}'.format('itertools.islice(, 4, 7)'))
print(list(itertools.islice('Aardvark', 4, 7)))

print('{:*^60}'.format('itertools.islice(, 1, 7, 2)'))
print(list(itertools.islice('Aardvark', 1, 7, 2)))
Ejemplo n.º 43
0
 def removestart(self, file):
     """Remove the header of the file."""
     return list(itertools.dropwhile(lambda l: l.startswith("#"), file))
Ejemplo n.º 44
0
def make_messages(locale=None,
                  domain='django',
                  verbosity='1',
                  all=False,
                  extensions=None):
    """
    Uses the locale directory from the Django SVN tree or an application/
    project to process all
    """
    # Need to ensure that the i18n framework is enabled
    from django.conf import settings
    if settings.configured:
        settings.USE_I18N = True
    else:
        settings.configure(USE_I18N=True)

    from django.utils.translation import templatize

    if os.path.isdir(os.path.join('conf', 'locale')):
        localedir = os.path.abspath(os.path.join('conf', 'locale'))
    elif os.path.isdir('locale'):
        localedir = os.path.abspath('locale')
    else:
        raise CommandError(
            "This script should be run from the Django SVN tree or your project or app tree. If you did indeed run it from the SVN checkout or your project or application, maybe you are just missing the conf/locale (in the django tree) or locale (for project and application) directory? It is not created automatically, you have to create it by hand if you want to enable i18n for your project or application."
        )

    if domain not in ('django', 'djangojs'):
        raise CommandError(
            "currently makemessages only supports domains 'django' and 'djangojs'"
        )

    if (locale is None and not all) or domain is None:
        # backwards compatible error message
        if not sys.argv[0].endswith("make-messages.py"):
            message = "Type '%s help %s' for usage.\n" % (os.path.basename(
                sys.argv[0]), sys.argv[1])
        else:
            message = "usage: make-messages.py -l <language>\n   or: make-messages.py -a\n"
        raise CommandError(message)

    languages = []
    if locale is not None:
        languages.append(locale)
    elif all:
        languages = [
            el for el in os.listdir(localedir) if not el.startswith('.')
        ]

    for locale in languages:
        if verbosity > 0:
            print "processing language", locale
        basedir = os.path.join(localedir, locale, 'LC_MESSAGES')
        if not os.path.isdir(basedir):
            os.makedirs(basedir)

        pofile = os.path.join(basedir, '%s.po' % domain)
        potfile = os.path.join(basedir, '%s.pot' % domain)

        if os.path.exists(potfile):
            os.unlink(potfile)

        all_files = []
        for (dirpath, dirnames, filenames) in os.walk("."):
            all_files.extend([(dirpath, f) for f in filenames])
        all_files.sort()
        for dirpath, file in all_files:
            file_base, file_ext = os.path.splitext(file)
            if domain == 'djangojs' and file_ext == '.js':
                if verbosity > 1:
                    sys.stdout.write('processing file %s in %s\n' %
                                     (file, dirpath))
                src = open(os.path.join(dirpath, file), "rb").read()
                src = pythonize_re.sub('\n#', src)
                open(os.path.join(dirpath, '%s.py' % file), "wb").write(src)
                thefile = '%s.py' % file
                cmd = 'xgettext -d %s -L Perl --keyword=gettext_noop --keyword=gettext_lazy --keyword=ngettext_lazy:1,2 --from-code UTF-8 -o - "%s"' % (
                    domain, os.path.join(dirpath, thefile))
                (stdin, stdout, stderr) = os.popen3(cmd, 't')
                msgs = stdout.read()
                errors = stderr.read()
                if errors:
                    raise CommandError(
                        "errors happened while running xgettext on %s\n%s" %
                        (file, errors))
                old = '#: ' + os.path.join(dirpath, thefile)[2:]
                new = '#: ' + os.path.join(dirpath, file)[2:]
                msgs = msgs.replace(old, new)
                if os.path.exists(potfile):
                    # Strip the header
                    msgs = '\n'.join(dropwhile(len, msgs.split('\n')))
                else:
                    msgs = msgs.replace('charset=CHARSET', 'charset=UTF-8')
                if msgs:
                    open(potfile, 'ab').write(msgs)
                os.unlink(os.path.join(dirpath, thefile))
            elif domain == 'django' and (file_ext == '.py'
                                         or file_ext in extensions):
                thefile = file
                if file_ext in extensions:
                    src = open(os.path.join(dirpath, file), "rb").read()
                    thefile = '%s.py' % file
                    open(os.path.join(dirpath, thefile),
                         "wb").write(templatize(src))
                if verbosity > 1:
                    sys.stdout.write('processing file %s in %s\n' %
                                     (file, dirpath))
                cmd = 'xgettext -d %s -L Python --keyword=gettext_noop --keyword=gettext_lazy --keyword=ngettext_lazy:1,2 --keyword=ugettext_noop --keyword=ugettext_lazy --keyword=ungettext_lazy:1,2 --from-code UTF-8 -o - "%s"' % (
                    domain, os.path.join(dirpath, thefile))
                (stdin, stdout, stderr) = os.popen3(cmd, 't')
                msgs = stdout.read()
                errors = stderr.read()
                if errors:
                    raise CommandError(
                        "errors happened while running xgettext on %s\n%s" %
                        (file, errors))
                if thefile != file:
                    old = '#: ' + os.path.join(dirpath, thefile)[2:]
                    new = '#: ' + os.path.join(dirpath, file)[2:]
                    msgs = msgs.replace(old, new)
                if os.path.exists(potfile):
                    # Strip the header
                    msgs = '\n'.join(dropwhile(len, msgs.split('\n')))
                else:
                    msgs = msgs.replace('charset=CHARSET', 'charset=UTF-8')
                if msgs:
                    open(potfile, 'ab').write(msgs)
                if thefile != file:
                    os.unlink(os.path.join(dirpath, thefile))

        if os.path.exists(potfile):
            (stdin, stdout,
             stderr) = os.popen3('msguniq --to-code=utf-8 "%s"' % potfile, 'b')
            msgs = stdout.read()
            errors = stderr.read()
            if errors:
                raise CommandError(
                    "errors happened while running msguniq\n%s" % errors)
            open(potfile, 'w').write(msgs)
            if os.path.exists(pofile):
                (stdin, stdout, stderr) = os.popen3(
                    'msgmerge -q "%s" "%s"' % (pofile, potfile), 'b')
                msgs = stdout.read()
                errors = stderr.read()
                if errors:
                    raise CommandError(
                        "errors happened while running msgmerge\n%s" % errors)
            open(pofile, 'wb').write(msgs)
            os.unlink(potfile)
Ejemplo n.º 45
0
# ['foo', 'bar', 'ls', '/some/dir', 0, 1, 2, 3, 4]


print (list(chain.from_iterable([cmd, numbers])))
# ['ls', '/some/dir', 0, 1, 2, 3, 4]


from itertools import compress
letters = 'ABCDEFG'
bools = [True, False, True, True, False]
print (list(compress(letters, bools)))  # checks the first against the second, keeps the first if match is True
# ['A', 'C', 'D']


from itertools import dropwhile
print (list(dropwhile(lambda x: x < 5, [1, 4, 6, 4, 1])))  # drops elements as long as the filter criteria is True
# [6, 4, 1]


def great_than_five(x):
	return x > 5
# Once we hit a value that is less than 5, then ALL the values after and including that value will be kept
print (list(dropwhile(great_than_five, [6, 7, 8, 9, 1, 2, 3, 10])))
# [1, 2, 3, 10]


from itertools import filterfalse
# filterfalse will only return those values that evaluated to False
print (list(filterfalse(great_than_five, [6, 7, 8, 9, 1, 2, 3, 10])))
# [1, 2, 3]
Ejemplo n.º 46
0
url = 'http://download.geonames.org/export/dump/countryInfo.txt'
fields = 'ISO', 'ISO3', 'ISOnumeric', 'fips', 'name', 'capital', 'area', 'population', 'continent', 'tld', 'currencyCode', 'currencyName', 'phone', 'postalCodeFormat', 'postalCodeRegex', 'languages', 'id', 'neighbours', 'equivalentFipsCode'
split_to_set = lambda s: set(s.split(','))
types = {
    'area': float,
    'id': int,
    'population': int,
    'ISOnumeric': int,
    'languages': split_to_set,
    'neighbours': split_to_set
}

f = urllib.urlopen(url)
Country = collections.namedtuple('Country', fields)
source = itertools.dropwhile(lambda l: l.startswith('#'), f)
reader = csv.DictReader(source, fields, delimiter='\t')

print 'import collections'
print 'Country = collections.namedtuple(\'Country\', {})'.format(fields)
print 'countries = ['

for line in reader:
    for field in fields:
        t = types.get(field, str)
        attr = line[field].strip()
        line[field] = t(attr) if attr else None
    print '    {},'.format(Country(**line))

print ']'
Ejemplo n.º 47
0
def go_data(ofile):
    """Skip header.

    the first next() call of the returned iterator will be the @data line"""
    return itertools.dropwhile(lambda x: not r_datameta.match(x), ofile)
Ejemplo n.º 48
0
def main():
    entries = read_data()

    view_names = set()
    interesting_attributes = Counter()

    for i in entries:
        view_names.add(i['view_name'])
        pathdir = i['path'].split('/')[1:]
        for j in pathdir:
            interesting_attributes[j] += 1

    for key, count in dropwhile(lambda key_count: key_count[1] >= S['il'], interesting_attributes.most_common()):
        del interesting_attributes[key]

    view_names = list(view_names)
    i2n, n2i = dict(), dict()
    for i, n in enumerate(view_names):
        i2n[i] = n
        n2i[n] = i

    interesting_attributes = interesting_attributes.keys()
    i2a, a2i = dict(), dict()
    for i, a in enumerate(interesting_attributes):
        i2a[i] = a
        a2i[a] = i

    global_step= tf.Variable(0., False, dtype=tf.float32)

    x = tf.placeholder(tf.float32, [None, len(interesting_attributes)], name = 'input')
    y_ = tf.placeholder(tf.float32, [None, len(view_names)], name = 'label')
    W = tf.Variable(tf.random_normal([len(interesting_attributes), len(view_names)]), dtype = tf.float32)
    b = tf.Variable(tf.random_normal([len(view_names)], dtype = tf.float32))
    linear = tf.matmul(x, W) + b
    pred = tf.argmax(tf.nn.softmax(linear), 1)
    actual = tf.argmax(y_, 1)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(pred, actual), tf.float32))
    loss = tf.losses.softmax_cross_entropy(y_, linear)
    learning_rate = tf.train.exponential_decay(S['lr'], global_step, 500, 0.5, staircase = True)
    optimizer = tf.train.AdamOptimizer(learning_rate)
    train = optimizer.minimize(loss, global_step)
    init = tf.global_variables_initializer()

    with tf.Session() as sess:
        tf.set_random_seed(0)
        sess.run(init)
        for i in range(S['ti']):
            feed_indices = np.random.randint(low = 0, high = len(entries), size = S['bs'])
            feed_input_arrays = []
            feed_labels = np.array([[0.] * len(view_names) for _ in range(S['bs'])], dtype = np.float32)
            for ji, j in enumerate(feed_indices):
                ia = np.zeros(len(interesting_attributes), dtype = np.float32)
                feed_labels[ji][n2i[entries[j]['view_name']]] = 1.
                for k in entries[j]['path'].split('/')[1:]:
                    if k in interesting_attributes:
                        ia[a2i[k]] = 1.
                feed_input_arrays.append(ia)
            feed_input = np.stack(feed_input_arrays)

            l, a, _ = sess.run((loss, accuracy, train), feed_dict={x: feed_input, y_: feed_labels})
            print('batch {:4}/{}: accuracy = {:.2f}%, loss = {}'.format(i+1, S['ti'], a*100, l))

        test_indices = np.random.randint(low = 0, high = len(entries), size = S['tc'])
        test_input_arrays = []
        test_labels = np.array([[0.] * len(view_names) for _ in range(S['tc'])], dtype = np.float32)
        for ii, i in enumerate(test_indices):
            ia = np.zeros(len(interesting_attributes), dtype = np.float32)
            test_labels[ii][n2i[entries[i]['view_name']]] = 1
            for j in entries[i]['path'].split('/')[1:]:
                if j in interesting_attributes:
                    ia[a2i[j]] = 1
            test_input_arrays.append(ia)
        test_input = np.stack(test_input_arrays)
        
        test_begin = datetime.now()
        test_p, test_a, acc, los = sess.run((pred, actual, accuracy, loss), feed_dict={x: test_input, y_: test_labels})
        test_end = datetime.now()
        test_elapse = test_end - test_begin
        print('{} tests completed in {} seconds\n  Accuracy: {:.2f}%\n  Loss: {}\n\n\n'.format(S['tc'], test_elapse.total_seconds(), acc*100, los))

        for ti, (tp, ta) in enumerate(zip(test_p, test_a)):
            if tp != ta:
                print('Mismatch:\n    Path: {}\n    Should obtain {},\n    got {}'.format(entries[test_indices[ti]]['path'], i2n[ta], i2n[tp]))
Ejemplo n.º 49
0
 def strip_head(items):
     return itertools.dropwhile(lambda i: i is self.SEPARATOR, items)
Ejemplo n.º 50
0
 def __iter__(self):
     self.iterobj = itertools.dropwhile(lambda x: x.startswith('#'),
                                        self.iterInfo)
     return self
from itertools import dropwhile, islice

lines = [
    "# comment1",
    "# comment2",
    "# comment3",
    "data 1",
    "data 2",
    "data 3"
]

for line in dropwhile(lambda l: l.startswith("#"), lines):
    print(line)
print()

# if the number of items to skip is known, can use islice
for x in islice(lines, 3, None):    # passing None indicates everything after the first 3 items is desired, like [3:]
    print(x)
from itertools import dropwhile


def should_drop(x):
    print('Testing:', x)
    return x < 1


for i in dropwhile(should_drop, [-1, 0, 1, 2, -2]):
    print('Yielding:', i)
Ejemplo n.º 53
0
    def outgoing_connections(self):
        """Returns a list of all outgoing connections for this peer."""

        # Outgoing connections are on the right
        return list(
            dropwhile(lambda c: c.direction != OUTGOING, self.connections))
def get_after(sentinel, iterable):
    "Get the value after `sentinel` in an `iterable`"
    truncated = dropwhile(lambda el: el != sentinel, iterable)
    next(truncated)
    return next(truncated)
Ejemplo n.º 55
0
 def dropwhile(self, predicate: Predicate) -> 'Stream[T]':
     return self.next(lambda xs: itertools.dropwhile(predicate, xs))
Ejemplo n.º 56
0
    def __parseLine(self, line):
        if line[0:1] == "#":
            # Debug line which we shall ignore as specified in CECPv2 specs
            return

#        log.debug("__parseLine: line=\"%s\"" % line.strip(), extra={"task":self.defname})
        parts = whitespaces.split(line.strip())

        if parts[0] == "pong":
            self.lastpong = int(parts[1])
            return

        # Illegal Move
        if parts[0].lower().find("illegal") >= 0:
            log.warning("__parseLine: illegal move: line=\"%s\", board=%s" \
                % (line.strip(), self.board), extra={"task":self.defname})
            if parts[-2] == "sd" and parts[-1].isdigit():
                print("depth", parts[-1], file=self.engine)
            return

        # A Move (Perhaps)
        if self.board:
            if parts[0] == "move":
                movestr = parts[1]
            # Old Variation
            elif d_plus_dot_expr.match(parts[0]) and parts[1] == "...":
                movestr = parts[2]
            else:
                movestr = False

            if movestr:
                log.debug("__parseLine: acquiring self.boardLock",
                          extra={"task": self.defname})
                self.waitingForMove = False
                self.readyForMoveNowCommand = False
                self.boardLock.acquire()
                try:
                    if self.engineIsInNotPlaying:
                        # If engine was set in pause just before the engine sent its
                        # move, we ignore it. However the engine has to know that we
                        # ignored it, and thus we step it one back
                        log.info("__parseLine: Discarding engine's move: %s" %
                                 movestr,
                                 extra={"task": self.defname})
                        print("undo", file=self.engine)
                        return
                    else:
                        try:
                            move = parseAny(self.board, movestr)
                        except ParsingError as e:
                            self.end(
                                WHITEWON if self.board.color == BLACK else
                                BLACKWON, WON_ADJUDICATION)
                            return

                        if validate(self.board, move):
                            self.board = None
                            self.returnQueue.put(move)
                            return
                        self.end(
                            WHITEWON if self.board.color == BLACK else
                            BLACKWON, WON_ADJUDICATION)
                        return
                finally:
                    log.debug("__parseLine(): releasing self.boardLock",
                              extra={"task": self.defname})
                    self.boardLock.release()
                    self.movecon.acquire()
                    self.movecon.notifyAll()
                    self.movecon.release()

        # Analyzing
        if self.engineIsInNotPlaying:
            if parts[:4] == ["0", "0", "0", "0"]:
                # Crafty doesn't analyze until it is out of book
                print("book off", file=self.engine)
                return

            match = anare.match(line)
            if match:
                depth, score, moves = match.groups()

                if "mat" in score.lower() or "#" in moves:
                    # Will look either like -Mat 3 or Mat3
                    scoreval = MATE_VALUE
                    if score.startswith('-'):
                        scoreval = -scoreval
                else:
                    scoreval = int(score)

                mvstrs = movere.findall(moves)
                try:
                    moves = listToMoves(self.board,
                                        mvstrs,
                                        type=None,
                                        validate=True,
                                        ignoreErrors=False)
                except:
                    # Errors may happen when parsing "old" lines from
                    # analyzing engines, which haven't yet noticed their new tasks
                    log.debug('Ignored an "old" line from analyzer: %s %s' %
                              (self.board, mvstrs),
                              extra={"task": self.defname})
                    return

                # Don't emit if we weren't able to parse moves, or if we have a move
                # to kill the opponent king - as it confuses many engines
                if moves and not self.board.board.opIsChecked():
                    self.emit("analyze", [(moves, scoreval, depth.strip())])

                return

        # Offers draw
        if parts[0:2] == ["offer", "draw"]:
            self.emit("accept", Offer(DRAW_OFFER))
            return

        # Resigns
        if parts[0] == "resign" or \
            (parts[0] == "tellics" and parts[1] == "resign"): # buggy crafty

            # Previously: if "resign" in parts,
            # however, this is too generic, since "hint", "bk",
            # "feature option=.." and possibly other, future CECPv2
            # commands can validly contain the word "resign" without this
            # being an intentional resign offer.

            self.emit("offer", Offer(RESIGNATION))
            return

        #if parts[0].lower() == "error":
        #    return

        #Tell User Error
        if parts[0] == "tellusererror":
            # We don't want to see our stop analyzer hack as an error message
            if "8/8/8/8/8/8/8/8" in "".join(parts[1:]):
                return
            # Create a non-modal non-blocking message dialog with the error:
            dlg = Gtk.MessageDialog(parent=None,
                                    flags=0,
                                    type=Gtk.MessageType.WARNING,
                                    buttons=Gtk.ButtonsType.CLOSE,
                                    message_format=None)

            # Use the engine name if already known, otherwise the defname:
            displayname = self.name
            if not displayname:
                displayname = self.defname

            # Compose the dialog text:
            dlg.set_markup(
                GObject.markup_escape_text(
                    _("The engine %s reports an error:") % displayname) +
                "\n\n" + GObject.markup_escape_text(" ".join(parts[1:])))

            # handle response signal so the "Close" button works:
            dlg.connect("response", lambda dlg, x: dlg.destroy())

            dlg.show_all()
            return

        # Tell Somebody
        if parts[0][:4] == "tell" and \
                parts[0][4:] in ("others", "all", "ics", "icsnoalias"):

            log.info("Ignoring tell %s: %s" %
                     (parts[0][4:], " ".join(parts[1:])))
            return

        if "feature" in parts:
            # Some engines send features after done=1, so we will iterate after done=1 too
            done1 = False
            # We skip parts before 'feature', as some engines give us lines like
            # White (1) : feature setboard=1 analyze...e="GNU Chess 5.07" done=1
            parts = parts[parts.index("feature"):]
            for i, pair in enumerate(parts[1:]):

                # As "parts" is split with no thoughs on quotes or double quotes
                # we need to do some extra handling.

                if pair.find("=") < 0:
                    continue
                key, value = pair.split("=", 1)

                if not key in self.features:
                    continue

                if value.startswith('"') and value.endswith('"'):
                    value = value[1:-1]

                # If our pair was unfinished, like myname="GNU, we search the
                # rest of the pairs for a quotating mark.
                elif value[0] == '"':
                    rest = value[1:] + " " + " ".join(parts[2 + i:])
                    j = rest.find('"')
                    if j == -1:
                        log.warning("Missing endquotation in %s feature",
                                    extra={"task": self.defname})
                        value = rest
                    else:
                        value = rest[:j]

                elif value.isdigit():
                    value = int(value)

                if key in self.supported_features:
                    print("accepted %s" % key, file=self.engine)
                else:
                    print("rejected %s" % key, file=self.engine)

                if key == "done":
                    if value == 1:
                        done1 = True
                        continue
                    elif value == 0:
                        log.info("Adds %d seconds timeout" % TIME_OUT_SECOND,
                                 extra={"task": self.defname})
                        # This'll buy you some more time
                        self.timeout = time.time() + TIME_OUT_SECOND
                        self.returnQueue.put("not ready")
                        return

                if key == "smp" and value == 1:
                    self.options["cores"] = {
                        "name": "cores",
                        "type": "spin",
                        "default": 1,
                        "min": 1,
                        "max": 64
                    }
                elif key == "memory" and value == 1:
                    self.options["memory"] = {
                        "name": "memory",
                        "type": "spin",
                        "default": 32,
                        "min": 1,
                        "max": 4096
                    }
                elif key == "option" and key != "done":
                    option = self.__parse_option(value)
                    self.options[option["name"]] = option
                else:
                    self.features[key] = value

                if key == "myname" and not self.name:
                    self.setName(value)

            if done1:
                # Start a new game before using the engine:
                # (CECPv2 engines)
                print("new", file=self.engine)

                # We are now ready for play:
                self.emit("readyForOptions")
                self.emit("readyForMoves")
                self.returnQueue.put("ready")

        # A hack to get better names in protover 1.
        # Unfortunately it wont work for now, as we don't read any lines from
        # protover 1 engines. When should we stop?
        if self.protover == 1:
            if self.defname[0] in ''.join(parts):
                basis = self.defname[0]
                name = ' '.join(
                    itertools.dropwhile(lambda part: basis not in part, parts))
                self.features['myname'] = name
                if not self.name:
                    self.setName(name)
Ejemplo n.º 57
0
 def extract_plot(self, response):
     plot_css = 'article p ::text'
     plot = clean(response.css(plot_css).extract())
     plot = dropwhile(lambda rd: 'release:' not in rd.lower(), plot)
     plot = [p for p in plot if 'release:' not in p.lower()]
     return plot or clean(response.css(plot_css).extract())
Ejemplo n.º 58
0
def _rerun_as_results(dset, revrange, since, branch, onto, message):
    """Represent the rerun as result records.

    In the standard case, the information in these results will be used to
    actually re-execute the commands.
    """

    try:
        results = _revrange_as_results(dset, revrange)
    except ValueError as exc:
        yield get_status_dict("run", status="error", message=exc_str(exc))
        return

    ds_repo = dset.repo
    # Drop any leading commits that don't have a run command. These would be
    # skipped anyways.
    results = list(dropwhile(lambda r: "run_info" not in r, results))
    if not results:
        yield get_status_dict("run",
                              status="impossible",
                              ds=dset,
                              message=("No run commits found in range %s",
                                       revrange))
        return

    if onto is not None and onto.strip() == "":
        onto = results[0]["commit"] + "^"

    if onto and not ds_repo.commit_exists(onto):
        yield get_status_dict(
            "run",
            ds=dset,
            status="error",
            message=("Revision specified for --onto (%s) does not exist.",
                     onto))
        return

    start_point = onto or "HEAD"
    if branch or onto:
        yield get_status_dict(
            "run",
            ds=dset,
            # Resolve this to the full hexsha so downstream code gets a
            # predictable form.
            commit=ds_repo.get_hexsha(start_point),
            branch=branch,
            rerun_action="checkout",
            status="ok")

    def skip_or_pick(hexsha, result, msg):
        result["rerun_action"] = "skip-or-pick"
        shortrev = ds_repo.get_hexsha(hexsha, short=True)
        result["message"] = ("%s %s; %s", shortrev, msg,
                             "skipping or cherry picking")

    for res in results:
        hexsha = res["commit"]
        if "run_info" in res:
            rerun_dsid = res["run_info"].get("dsid")
            if rerun_dsid is not None and rerun_dsid != dset.id:
                skip_or_pick(hexsha, res, "was ran from a different dataset")
                res["status"] = "impossible"
            else:
                res["rerun_action"] = "run"
                res["diff"] = diff_revision(dset, hexsha)
                # This is the overriding message, if any, passed to this rerun.
                res["rerun_message"] = message
        else:
            if len(res["parents"]) > 1:
                res["rerun_action"] = "merge"
            else:
                skip_or_pick(hexsha, res, "does not have a command")
        yield res
Ejemplo n.º 59
0
def remove_color(nick):
    # 0 is x3
    # then follows 1 or 2 numbers
    return "".join(list(dropwhile(lambda x: x.isdigit(), nick[1:])))
Ejemplo n.º 60
0
    def _parse(self, ds, name=None):
        """ Parse the description, examples, and tags from a docstring. """
        lines = ds.split(os.linesep)

        def seek_past_head(ls):
            h = []
            for i, l in enumerate(ls):
                if self._is_blank(l) or self._is_tag_start(l):
                    return h, i
                h.append(l)
            else:
                return h, len(ls)

        head, non_head_index = seek_past_head(lines)
        #if not head:
        #    raise LucidocError("Empty docstring")
        head = " ".join(l.strip() for l in head)

        ls1, ls2 = tee(lines[non_head_index:])
        detail_lines = list(
            filterfalse(self._is_blank,
                        takewhile(lambda l: not self._past_desc(l), ls1)))

        desc = head
        if detail_lines:
            desc += (("\n\n" if desc else "") + "\n".join(detail_lines))
        post_desc = list(dropwhile(lambda l: not self._past_desc(l), ls2))

        raw_tag_blocks = []
        if post_desc and self._is_tag_start(post_desc[0]):
            curr_block = []
            for i, l in enumerate(post_desc):
                if self._is_blank(l):
                    first_non_tag_index = i + 1
                    break
                l = l.strip()
                if self._is_tag_start(l):
                    if curr_block:
                        raw_tag_blocks.append(curr_block)
                    curr_block = [l]
                else:
                    curr_block.append(l)
            else:
                first_non_tag_index = None
            curr_block and raw_tag_blocks.append(curr_block)
        else:
            first_non_tag_index = 0

        examples = self._parse_example_lines(
            [] if first_non_tag_index is None else
            post_desc[first_non_tag_index:])

        tags = [self._get_tag(chunk) for chunk in raw_tag_blocks]

        par, ret, err = [], [], []
        for t in tags:
            if isinstance(t, ParTag):
                par.append(t)
            elif isinstance(t, RetTag):
                ret.append(t)
            elif isinstance(t, ErrTag):
                err.append(t)
            else:
                raise TypeError("Unrecognized doc tag type: {}".format(
                    type(t)))

        if len(ret) > 1:
            raise LucidocError("Multiple ({}) returns tags: {}".format(
                len(ret), ret))
        ret = ret[0] if ret else None

        self._last_seen = ParsedDocstringResult(ds, desc, par, ret, err,
                                                examples)

        return getattr(self._last_seen, name) if name else self._last_seen