def trim_list(list): "Trim false elements on both ends" ltrimmed = [x for x in itertools.dropwhile(lambda x: not x, list)] ltrimmed.reverse() trimmed = [x for x in itertools.dropwhile(lambda x: not x, ltrimmed)] trimmed.reverse() return trimmed
def __init__(self, resource=None, fields=None, sheet=0, encoding=None, skip_rows=0, has_header=True, stop_empty_line=False): """Creates a XLSX spreadsheet data source stream. Attributes: * fields: `bubbles.metadata.FieldList` to use instead of auto-detection * resource: file name, URL or file-like object * sheet: sheet index number (as int) or sheet name * has_header: flag determining whether first line contains header or not. ``True`` by default. * stop_empty_line: flag to stop iteration over rows at the first encounter with an empty line. As XLSX files can contain millions or rows, this might cause very long iterations, especially if all the lines are empty past a certain point """ if isinstance(resource, openpyxl.Workbook): self.workbook = resource else: self.workbook = _load_workbook(resource) if isinstance(sheet, int): self.sheet = self.workbook.worksheets[sheet] elif isinstance(sheet, str): self.sheet = self.workbook[sheet] else: raise ArgumentError('sheet has to be a string or an integer') if has_header: self.first_row = skip_rows + 1 else: self.first_row = skip_rows self.stop_empty_line = stop_empty_line if fields: self.fields = fields else: rows = enumerate(self.sheet.rows) first_row = next(dropwhile(lambda x: x[0] < self.first_row, rows))[1] if has_header: header_rows = enumerate(self.sheet.rows) header_row = next(dropwhile(lambda x: x[0] < (self.first_row - 1), header_rows))[1] # fetch names, replace line breaks by spaces in case of # manual line wrapping names = [' '.join(str(c.value).split()) for c in header_row] else: names = ['col%d' % i for i in range(len(first_row))] self.fields = FieldList() for name, cell in zip(names, first_row): if cell.is_date: storage_type = 'date' else: storage_type = CELL_TYPES.get(cell.data_type, 'unknown') field = Field(name, storage_type=storage_type) self.fields.append(field)
def run_ogg_info(cmdpath, filename): """Run ogginfo.exe from `cmdpath` on `filename`. Returns OggInfo object.""" proc = subprocess.Popen([cmdpath, filename], stdout=subprocess.PIPE, universal_newlines=True) data = list(proc.stdout) metadata = { "title": None, "artist": None, "album": None, "tracknumber": None, "length": None } # Find User comments userdata = itertools.dropwhile(lambda s : not s.startswith("User comments section follows"), data) for line in userdata: for st in ["title", "artist", "album", "tracknumber"]: m = re.search("{}\s*=\s*(.*)".format(st), line) if m != None: metadata[st] = m.group(1) # Find stream information streamdata = itertools.dropwhile(lambda s : not s.startswith("Vorbis stream"), data) for line in streamdata: m = re.search("Playback length:\s*(.*)", line) if m != None: # Expect format "{int}m:{float}s" t = re.search("(\d+)m:([\d\.]+)s", m.group(1)) if t == None: raise Exception("Unknown time code: '{}'".format(m.group(1))) else: metadata["length"] = int(t.group(1)) * 60 + math.ceil(float(t.group(2))) # Return return OggInfo(metadata["title"], metadata["artist"], metadata["album"], metadata["tracknumber"], metadata["length"])
def _FillDIMMs(self, lsCfgData): """Fills RAM modules information from 'lscfg -vp' output stored in lsCfgData list""" iterDIMMsData = it.dropwhile(lambda x: not RE_RAM_MODULE.match(x), lsCfgData) dDIMMs = {} self.iDIMMs = 0 try: while True: sHWLoc, sName, sSN, sPN, iSize = ('', '', '', '', 0) # empty variables iterDIMMsData = it.dropwhile(lambda x: not RE_RAM_MODULE.match(x), iterDIMMsData) # we are at first line of disk's description. Let's parse it. # sL1 = next(iterDIMMsData).strip() next(iterDIMMsData) # oLog.debug('_FillDIMMs: 1st line is {}'.format(sL1)) self.iDIMMs += 1 sL = '--------' # initialize loop variable while sL != '': sL = next(iterDIMMsData).strip() if sL[:22] == "Hardware Location Code": sHWLoc = RE_DOTS.split(sL)[1] sName = 'RAM Module {}'.format(sHWLoc.split('.')[-1]) elif sL[:13] == "Serial Number": sSN = RE_DOTS.split(sL)[1] elif sL[:11] == "Part Number": sPN = RE_DOTS.split(sL)[1] elif sL[:6] == "Size..": iSize = int(RE_DOTS.split(sL)[1]) // 1024 else: pass # skip unknown lines # collect all the information to one data structure dDIMM_Dict = {'SN': sSN, 'PN': sPN, 'Loc': sHWLoc, 'Size': iSize} dDIMMs[sName] = dDIMM_Dict continue # while true except StopIteration: # end of lscfg output, no more DIMMs pass # now dDIMMs dictionary contains our information, but the # dictionary's key is not perfect for Zabbix item name, we need to # shorten it and remove uniqueness linked with usage of box S/N in # DIMM position. First, we need to arrange modules by boxes dDimmsByBoxes = {} for sName, dValue in dDIMMs.items(): sBoxName, sOther = sName.split('-', maxsplit=1) # if adding a first element, create a dictionary if dDimmsByBoxes.get(sBoxName, None) is None: dDimmsByBoxes[sBoxName] = {sOther: dValue} else: dDimmsByBoxes[sBoxName][sOther] = dValue # Now (hopefully) all DIMMs are grouped by a box. Just sort and number these boxes lBoxNames = list(dDimmsByBoxes.keys()) lBoxNames.sort() # <-- in place for iBoxNum in range(0, len(lBoxNames)): dInBox = dDimmsByBoxes[lBoxNames[iBoxNum]] for sOther, dValue in dInBox.items(): sName = "Box{}-{}".format(iBoxNum + 1, sOther) oDIMM = IBM_DIMM_Module(sName, dValue['PN'], dValue['SN'], dValue['Loc'], dValue['Size']) # oLog.debug('DIMM object created: ' + str(oDIMM)) self.lDIMMs.append(oDIMM) return
def move_protocol_to_wiki(session, pad_name_addon=None): if pad_name_addon: pad_name = session.pad_name.format(pad_name_addon) else: pad_name = session.pad_name pad_lines = download_pad(session.pad_team, pad_name) logg.debug("downloaded pad for %s", pad_name) #vorspann wegwerfen pad_it = dropwhile(lambda s: "= Protokoll ab hier =" not in s, pad_lines) next(pad_it) # leere Zeilen wegwerfen pad_it = dropwhile(lambda s: not s.strip(), pad_it) header = next(pad_it) match = re.search(r"(\d+)\.(\d+)\.(\d\d+)", header) if not match: raise Exception("Mit dem Protokoll-Anfang stimmt was nicht, Datum konnte nicht erkannt werden: {}".format(header)) day, month, year = [int(e) for e in match.groups()] if year < 100: year += 2000 session_date = date(year=year, month=month, day=day) reversed_date = reverse_date_format(session_date) unquote_func = lambda s: s.replace(">", ">").replace("<", "<") edit_uri = "http://wiki.piratenpartei.de/wiki//index.php?title={}&action=edit" logg.debug("Header ist:\n%s, Protokoll-Datum %s", header, reversed_date) logg.info("Inhalt:" + "-" * 80) print(header + "".join(imap(unquote_func, pad_it))) logg.info("-" * 80) logg.info("Seiten-URI fürs Protokoll:") logg.info(edit_uri.format(session.wiki_protocol_uri.format(date=reversed_date)))
def text_quote(message): # avoid importing a big module by using a simple heuristic to guess the # right encoding def decode(s, encodings=('ascii', 'utf8', 'latin1')): for encoding in encodings: try: return s.decode(encoding) except UnicodeDecodeError: pass return s.decode('ascii', 'ignore') lines = message.splitlines() # delete empty lines at beginning and end (some email client insert these # outside of the pgp signed message...) if lines[0] == '' or lines[-1] == '': from itertools import dropwhile lines = list(dropwhile(lambda l: l == '', lines)) lines = list(reversed(list(dropwhile( lambda l: l == '', reversed(lines))))) if len(lines) > 0 and lines[0] == '-----BEGIN PGP MESSAGE-----' \ and lines[-1] == '-----END PGP MESSAGE-----': try: sigs, d = crypto.decrypt_verify(message.encode('utf-8')) message = decode(d) except errors.GPGProblem: pass elif len(lines) > 0 and lines[0] == '-----BEGIN PGP SIGNED MESSAGE-----' \ and lines[-1] == '-----END PGP SIGNATURE-----': # gpgme does not seem to be able to extract the plain text part of # a signed message import gnupg gpg = gnupg.GPG() d = gpg.decrypt(message.encode('utf8')) message = d.data.decode('utf8') quote_prefix = settings.get('quote_prefix') return "\n".join([quote_prefix + line for line in message.splitlines()])
def mpairs(seq1, seq2, key1, key2=None): """Generates a tuple of matching pairs key1 and key2 are functions (getters, sort of) seq1, seq2 must be sorted in ascending order before being passed here and also each key value(which is returned by key funcs) must be UNIQUE otherwise you will see unexpected results """ key2 = key2 or key1 seq1, seq2 = iter(seq1), iter(seq2) s1, s2 = next(seq1), next(seq2) k1, k2 = key1(s1), key2(s2) while True: try: if k1 == k2: yield (s1, s2) s1, s2 = next(seq1), next(seq2) k1, k2 = key1(s1), key2(s2) elif k1 < k2: s1 = next(dropwhile(lambda x: key1(x) < k2, seq1)) k1 = key1(s1) else: s2 = next(dropwhile(lambda x: key2(x) < k1, seq2)) k2 = key2(s2) except StopIteration: break
def flow(iterable, pipes, session=Session()): """Flow data through a pipeline of transforms. Takes an iterable and a list of functions ("pipes") to pass it through. The output of each pipe serves as the input to the next. The final result is just another iterable. If the pipes are generators, ``flow`` will be entirely lazy. Empty values (``None``) are valid in the pipeline. None-pipes are always skipped; their only use is as destination markers for the items finalized above. Flow for all items, finalized or not, is resumed following the none-pipe. """ if not any(pipes): for out_item in iterable: yield out_item return for in_item in iterable: if session.consume_finalized(): remaining_pipes = list(dropwhile(lambda p: p is not None, pipes)) for out_item in flow([in_item], remaining_pipes, session): yield out_item else: remaining_pipes = list(dropwhile(lambda p: p is None, pipes)) output = remaining_pipes[0](in_item, session) for out_item in flow(output, remaining_pipes[1:], session): yield out_item
def plot_probability(k, sample_size=1000): N = np.arange(1, 200) p_pair = [multi_birthday_probability(n, 2, k) for n in N] p_pair_ian = [multi_birthday_probability_ian(n, 2, k) for n in N] p_pair_experimental = [multi_birthday_probability_experimental(n, 2, k, sample_size=sample_size) for n in N] p_triple = [multi_birthday_probability(n, 3, k) for n in N] p_triple_ian = [multi_birthday_probability_ian(n, 3, k) for n in N] p_triple_experimental = [multi_birthday_probability_experimental(n, 3, k, sample_size=sample_size) for n in N] # Find the smallest n such that p >= 0.5. Assuming p is monotonically increasing with N. N_half_pair = it.dropwhile(lambda (n, p): p < 0.5, zip(N, p_pair)).next()[0] N_triple_pair = it.dropwhile(lambda (n, p): p < 0.5, zip(N, p_triple)).next()[0] P.clf() P.hold(True) P.plot(N, p_pair , 'b-', N, p_pair_ian , 'g-', N, p_pair_experimental , 'b.', N, p_triple , 'r-', N, p_triple_ian , 'g-', N, p_triple_experimental, 'r.') P.legend(['m = 2, theoretical', 'm = 2, Ian', 'm = 2, %d samples' % (sample_size,), 'm = 3, theoretical', 'm = 3, Ian', 'm = 3, %d samples' % (sample_size,)], loc='lower right') P.grid(True) P.xlabel('# People') P.ylabel('Probability') P.title('Probability of m people with the same birthday') y_limits = (-0.01, 1.01) P.plot([N_half_pair, N_half_pair], y_limits, 'k--') P.plot([N_triple_pair, N_triple_pair], y_limits, 'k--') P.ylim(y_limits) P.show()
def seq_range(seq:Iterable[int],ini:int,fin:int,key:Callable[...,int]=None) -> Iterator[int]: """Regresa un generador con los elementos en seq talque ini <= x <= fin para los x en seq (se asume que seq es una secuencia creciente de números)""" if key: return itertools.dropwhile(lambda x: ini>key(x), itertools.takewhile(lambda x: key(x)<=fin,seq)) return itertools.dropwhile(lambda x: ini>x, itertools.takewhile(lambda x: x<=fin,seq))
def _FillPwrSupplies(self, lsCfgData): """ Fills power supplies list from output of 'lscfg -vp' saved in a list of strings """ iterPSData = it.dropwhile(lambda x: not RE_PWRSUPPLY.match(x), lsCfgData) self.iPwrSupplies = 0 try: while True: sPN = '' # no P/N on non-local drives iterPSData = it.dropwhile(lambda x: not RE_PWRSUPPLY.match(x), iterPSData) # we are at first line of disk's description. Let's parse it. # sL1 = next(iterPSData).strip() next(iterPSData) self.iPwrSupplies += 1 # oLog.debug('_FillPwrSupply: 1st line is {}'.format(sL1)) sName = 'Power Supply {}'.format(self.iPwrSupplies) sL = '--------' # initialize loop variable while sL != '': # empty line is end of PS record sL = next(iterPSData).strip() if sL[:22] == "Hardware Location Code": sHWLoc = RE_DOTS.split(sL)[1] elif sL[:13] == "Serial Number": sSN = RE_DOTS.split(sL)[1] elif sL[:11] == "Part Number": sPN = RE_DOTS.split(sL)[1] else: pass # skip unknown lines # create PwrSupply object self.lPwrSupplies.append(IBM_Power_Supply(sName, sPN, sSN, sHWLoc)) continue # while true except StopIteration: # end of lscfg output, no more Power Supplies pass return
def print_orfs(seq,codon_table_ncbi): ''' Finds open reading frames in DNA string, including nested ORFs, and prints them, along with RNA and protein translations''' #stop_codons = [rna_to_dna(k) for k, v in codon_table.items() if v == '*'] stop_codons = codon_table_ncbi.stop_codons #start_codons = [rna_to_dna(k) for k, v in codon_table.items() if v == 'M'] start_codons = codon_table_ncbi.start_codons codon_table = codon_table_ncbi.forward_table orfs = [] # if we care about positions: #frame1 = zip(itertools.count(),(''.join(k) for k in zip(seq[0::3],seq[1::3],seq[2::3]))) #frame2 = zip(itertools.count(),(''.join(k) for k in zip(seq[1::3],seq[2::3],seq[3::3]))) #frame3 = zip(itertools.count(),(''.join(k) for k in zip(seq[2::3],seq[3::3],seq[4::3]))) def chunk3frames(frnum): '''Split up DNA sequence string into triplets, offset by frame ''' return (''.join(k) for k in zip(seq[0+frnum::3],seq[1+frnum::3],seq[2+frnum::3])) for frame in map(chunk3frames,range(3)): exhausted = False # Are there no more ORFs to find? passthrough = itertools.dropwhile(lambda l: l not in start_codons, frame) while exhausted is False: passthrough, process = itertools.tee(passthrough) result = itertools.takewhile(lambda l: l not in stop_codons, process) # this omits the stop codon new_orf = list(result) passthrough = itertools.dropwhile(lambda l: l not in start_codons, itertools.islice(passthrough,1,None)) if len(new_orf) > 0: orfs.append(new_orf) else: exhausted = True return([''.join(orf) for orf in orfs])
def stock_data_task(api_object): # checkpoint logic start_date = datetime.today() default_api = api_object.apis[0][0] checkpoint, _ = StockDataCheckpoint.objects.get_or_create( domain=api_object.domain, defaults={ "api": default_api, "date": None, "limit": 1000, "offset": 0, "location": None, "start_date": start_date, }, ) if not checkpoint.api: checkpoint.api = default_api if not checkpoint.start_date: checkpoint.start_date = start_date checkpoint.save() if not api_object.all_stock_data: facilities = api_object.test_facilities else: facilities = api_object.get_ids() if checkpoint.location: external_id = api_object.get_last_processed_location(checkpoint) if external_id: facilities = list(itertools.dropwhile(lambda x: int(x) != int(external_id), facilities)) process_facility_task(api_object, facilities[0], start_from=checkpoint.api) facilities = facilities[1:] if not checkpoint.date or checkpoint.location: # use subtasks only during initial migration facilities_chunked_list = chunked(facilities, 5) for chunk in facilities_chunked_list: api_object.process_data(process_facility_task, chunk) else: offset = checkpoint.offset for stock_api in itertools.dropwhile(lambda x: x.name != checkpoint.api, api_object.get_stock_apis_objects()): stock_api.add_date_filter(checkpoint.date, checkpoint.start_date) synchronization( stock_api, checkpoint, checkpoint.date, 1000, offset, params={"domain": api_object.domain}, domain=api_object.domain, atomic=True, ) offset = 0 checkpoint = StockDataCheckpoint.objects.get(domain=api_object.domain) save_stock_data_checkpoint(checkpoint, default_api, 1000, 0, checkpoint.start_date, None, False) checkpoint.start_date = None checkpoint.save()
def it_filter(): # drop_while返回一个迭代器,返回输入可迭代对象中条件第一次为false之后的元素 print list(it.dropwhile(lambda x: x > 1, iter([0, 1, 2, -1]))) print list(it.dropwhile(lambda x: x > 1, [0, 1, 2, -1])) # take_while返回一个迭代器,返回输入可迭代对象中为True的对象,直到碰到一个False print list(it.takewhile(lambda x: x > 1, iter([0, 1, 2, -1]))) print list(it.takewhile(lambda x: x > 1, iter([2, 1, 2, -1])))
def get_bounding_elements(x, l, key=lambda elem: elem, sort=False): """ Get the two elements of a list that bound a value """ if sort: l = sorted(l, key=key) return (next(itertools.dropwhile(lambda elem: key(elem) > x, reversed(l)), None), next(itertools.dropwhile(lambda elem: key(elem) < x, l), None))
def parse_docstring(docstring): lines = map(lambda x: x.strip(), docstring.split("\n")) definitions = list(itertools.dropwhile(lambda x: x, lines)) summaries = itertools.dropwhile(lambda x: not x, definitions) summaries = itertools.takewhile(lambda x: x, summaries) summaries = list(summaries) summaries = summaries if summaries else list(lines) return definitions, summaries
def __init__(self, muon_track, muon_p, muon_losses, frame): # Create loss tuples self.losses = sorted([[loss.energy, (loss.pos - muon_p.pos)*muon_p.dir, int(loss.type)] for loss in muon_losses], key=lambda x: x[1]) # Create checkpoints self.checkpoints = [(muon_p.energy, 0)] muon_pos_i = dataclasses.I3Position(muon_track.xi, muon_track.yi, muon_track.zi) self.checkpoints.append((muon_track.Ei, (muon_pos_i - muon_p.pos)*muon_p.dir)) muon_pos_c = dataclasses.I3Position(muon_track.xc, muon_track.yc, muon_track.zc) self.checkpoints.append((muon_track.Ec, (muon_pos_c - muon_p.pos)*muon_p.dir)) muon_pos_f = dataclasses.I3Position(muon_track.xf, muon_track.yf, muon_track.zf) self.checkpoints.append((muon_track.Ef, (muon_pos_f - muon_p.pos)*muon_p.dir)) self.checkpoints.append((0, muon_p.length)) # Assign valid checkpoints track_cps = self.checkpoints[1:-1] self.valid_checkpoints = [self.checkpoints[0]] + [cp for cp in track_cps if cp[0] > 0] + [self.checkpoints[-1]] self.valid_checkpoints = sorted(self.valid_checkpoints, key=lambda x: x[1]) # Add loss sums to losses next_dist = 0 total = 0 for j in xrange(len(self.losses)): if self.losses[j][1] >= next_dist: next_dist = next(itertools.dropwhile(lambda cp: cp[1] <= self.losses[j][1], self.valid_checkpoints), (None, np.inf))[1] total = 0 total += self.losses[j][0] self.losses[j] = tuple(self.losses[j] + [total]) self.loss_rates = [] self.loss_ranges = [] for i in xrange(0, len(self.valid_checkpoints)-1): cp1 = self.valid_checkpoints[i] cp2 = self.valid_checkpoints[i+1] first_index = next(itertools.dropwhile(lambda l: l[1][1] <= cp1[1], enumerate(self.losses)), [-1])[0] last_index = len(self.losses) - 1 - next(itertools.dropwhile(lambda l: l[1][1] >= cp2[1], enumerate(reversed(self.losses))), [0])[0] if last_index < 0: total_stochastic_loss = 0 else: total_stochastic_loss = self.losses[last_index][3] try: loss_rate = (cp1[0] - cp2[0] - total_stochastic_loss) / (cp2[1] - cp1[1]) except: print self.checkpoints print self.valid_checkpoints print 'i: %d' % i print muon_p print frame['I3EventHeader'] print frame['I3EventHeader'].run_id print frame['I3EventHeader'].event_id raise self.loss_rates.append(loss_rate) self.loss_ranges.append((first_index, last_index+1))
def _load(self): is_header = lambda line: not line.startswith('<') parser = OFXParser(self, accounts_only=True) for line in dropwhile(is_header, self.lines): parser.feed(line) parser.close() parser = OFXParser(self) for line in dropwhile(is_header, self.lines): parser.feed(line) parser.close()
def regex_chunk(lines, regex): # type: (List[str], Pattern[str]) -> List[List[str]] lst = list(itertools.dropwhile(lambda x: not regex.match(x), lines)) arr = [] while lst: ret = [lst[0]]+list(itertools.takewhile(lambda x: not regex.match(x), lst[1:])) arr.append(ret) lst = list(itertools.dropwhile(lambda x: not regex.match(x), lst[1:])) return arr
def strip_rows(lines): ''' returns an iterator of lines with leading and trailing blank cells removed ''' isblank = lambda s: s == '' for line in lines: leading_dropped = list(itertools.dropwhile(isblank, line)) rev_line = list(itertools.dropwhile(isblank, reversed(leading_dropped))) yield list(reversed(rev_line))
def trianglepentagonhexagons(): triang_iter = triangles() pentag_iter = pentagons() for hexag in hexagons(): for pentag in dropwhile(lambda p: p < hexag, pentag_iter): if pentag == hexag: for triang in dropwhile(lambda t: t < hexag,triang_iter): if triang == hexag: yield hexag break break
def remove_blocks(name, iterable): start, end = BLOCK_START % name, BLOCK_END % name it = iter(iterable) while True: line = next(it) while line != start: yield line line = next(it) it = tail(itertools.dropwhile(not_eq(end), it)) if remove_empty_next: it = itertools.dropwhile(lambda el: not el.strip(), it)
def accuDetail(correct, total, legend, ylim = 100, treshold=1000) : get_index = lambda cs : list(dropwhile(lambda (i,c) : sum(c) < treshold,enumerate(cs)))[0][0] indices = [get_index(cs) for cs in correct] print(indices) for c,t,l,i in zip(correct, total, legend, indices) : print("%s:\t%i of %i\t(%.2f%%)" % (l, sum(c[i]), sum(t[i]), 100*sum(c[i])/float(sum(t[i])))) get_accu = lambda ts,cs,index : [1 if t == 0 else c/float(t) for t,c in zip(ts[index], cs[index])] accu = [get_accu(ts, cs, index) for ts,cs,index in zip(total, correct, indices)] accuHist(accu, legend, ylim=ylim)
def strip_rows(lines): """ returns an iterator of lines with leading and trailing blank (empty or which contain only space) cells. """ isblank = lambda s: s == '' or s.isspace() for line in lines: leading_dropped = list(itertools.dropwhile(isblank, line)) rev_line = list(itertools.dropwhile(isblank, reversed(leading_dropped))) yield list(reversed(rev_line))
def stencil(**kwargs): """ Applying genotype calls to multi-way alignment incidence matrix :param alnfile: alignment incidence file (h5), :param gtypefile: genotype calls by GBRS (tsv), :param grpfile: gene ID to isoform ID mapping info (tsv) :return: genotyped version of alignment incidence file (h5) """ alnfile = kwargs.get('alnfile') gtypefile = kwargs.get('gtypefile') grpfile = kwargs.get('grpfile') if grpfile is None: grpfile2chk = os.path.join(DATA_DIR, 'ref.gene2transcripts.tsv') if os.path.exists(grpfile2chk): grpfile = grpfile2chk else: print >> sys.stderr, '[gbrs::stencil] A group file is *not* given. Genotype will be stenciled as is.' # Load alignment incidence matrix ('alnfile' is assumed to be in multiway transcriptome) alnmat = emase.AlignmentPropertyMatrix(h5file=alnfile, grpfile=grpfile) # Load genotype calls hid = dict(zip(alnmat.hname, np.arange(alnmat.num_haplotypes))) gid = dict(zip(alnmat.gname, np.arange(len(alnmat.gname)))) gtmask = np.zeros((alnmat.num_haplotypes, alnmat.num_loci)) gtcall_g = dict.fromkeys(alnmat.gname) with open(gtypefile) as fh: if grpfile is not None: gtcall_t = dict.fromkeys(alnmat.lname) for curline in dropwhile(is_comment, fh): item = curline.rstrip().split("\t") g, gt = item[:2] gtcall_g[g] = gt hid2set = np.array([hid[c] for c in gt]) tid2set = np.array(alnmat.groups[gid[g]]) gtmask[np.meshgrid(hid2set, tid2set)] = 1.0 for t in tid2set: gtcall_t[alnmat.lname[t]] = gt else: for curline in dropwhile(is_comment, fh): item = curline.rstrip().split("\t") g, gt = item[:2] gtcall_g[g] = gt hid2set = np.array([hid[c] for c in gt]) gtmask[np.meshgrid(hid2set, gid[g])] = 1.0 alnmat.multiply(gtmask, axis=2) for h in xrange(alnmat.num_haplotypes): alnmat.data[h].eliminate_zeros() outfile = kwargs.get('outfile') if outfile is None: outfile = 'gbrs.stenciled.' + os.path.basename(alnfile) alnmat.save(h5file=outfile)
def normalise_text(text): """ Removes leading and trailing whitespace from each line of text. Removes leading and trailing blank lines from text. """ stripped = text.strip() stripped_lines = [line.strip() for line in text.split("\n")] # remove leading and trailing empty lines stripped_head = list(itertools.dropwhile(lambda s: not s, stripped_lines)) stripped_tail = itertools.dropwhile(lambda s: not s, reversed(stripped_head)) return "\n".join(reversed(list(stripped_tail)))
def check_file_neon(fn, source): lines = enumerate(source.split("\n"), 1) for incomment, doc in itertools.groupby(lines, lambda line: len(line[1]) >= 2 and line[1][1] == "|"): if incomment: full = list(doc) doc = itertools.dropwhile(lambda x: "Example:" not in x, [x[1] for x in full]) try: next(doc) test("IMPORT {}\n".format(os.path.basename(fn)[:-5]) + "\n".join(re.sub(r"^ \|\s*[>:|]", "", x) for x in doc if x.startswith(" | "))) except StopIteration: firstline = next(itertools.dropwhile(lambda x: not x[1][3:].strip(), full)) undocumented.append("no example in {}:{} for {}".format(fn, firstline[0], firstline[1][3:].strip()))
def dfs_singles(r, path, a, b, depth=0, debug=False): '''A leaf of the depth-first search in combinations of large primes: depth-first search within ''singles'' = element of the ''unknown'' array.''' if abs(r) < 1e-12: yield path elif r > 0: i_min, i_max = 0, len(a) try: i_min = it.dropwhile(lambda (_, x): 1 - 1e-10 > r * x * x, enumerate(a)).next()[0] except StopIteration: i_min = len(a) try: i_max = it.dropwhile(lambda (_, x): x > r - 1e-10, enumerate(b)).next()[0] except StopIteration: i_max = len(a) for i in xrange(i_min, i_max): for p in dfs_singles(r - 1. / (a[i] * a[i]), path | set([a[i]]), a[i + 1:], b[i + 1:], debug=debug): yield p
def problem49(): primes = primesUpTo(10000) for a in dropwhile(lambda x: x <= 1487, primes): # the bound comes from wanting c = 2b - a ≤ 10000 for b in dropwhile(lambda x: x <= a, primes): if b >= (10000 + a) / 2: break c = b + (b - a) # Do the same digits first since I think that is the most time # consuming part if sameDigits(a, b) and sameDigits(b, c) and isPrime(c): return int(str(a) + str(b) + str(c))
def populate_report_data(start_date, end_date, domain, runner, locations=None, strict=True): # first populate all the warehouse tables for all facilities # hard coded to know this is the first date with data start_date = max(start_date, default_start_date()) # For QA purposes generate reporting data for only some small part of data. if not ILSGatewayConfig.for_domain(domain).all_stock_data: if locations is None: locations = _get_test_locations(domain) facilities = filter(lambda location: location.location_type == 'FACILITY', locations) non_facilities_types = ['DISTRICT', 'REGION', 'MSDZONE', 'MOHSW'] non_facilities = [] for location_type in non_facilities_types: non_facilities.extend(filter(lambda location: location.location_type == location_type, locations)) else: facilities = Location.filter_by_type(domain, 'FACILITY') non_facilities = list(Location.filter_by_type(domain, 'DISTRICT')) non_facilities += list(Location.filter_by_type(domain, 'REGION')) non_facilities += list(Location.filter_by_type(domain, 'MSDZONE')) non_facilities += list(Location.filter_by_type(domain, 'MOHSW')) if runner.location: if runner.location.location_type.name.upper() != 'FACILITY': facilities = [] non_facilities = itertools.dropwhile( lambda location: location._id != runner.location.location_id, non_facilities ) else: facilities = itertools.dropwhile( lambda location: location._id != runner.location.location_id, facilities ) facilities_chunked_list = chunked(facilities, 5) for chunk in facilities_chunked_list: res = chain(process_facility_warehouse_data.si(fac, start_date, end_date, runner) for fac in chunk)() res.get() non_facilities_chunked_list = chunked(non_facilities, 50) # then populate everything above a facility off a warehouse table for chunk in non_facilities_chunked_list: res = chain( process_non_facility_warehouse_data.si(org, start_date, end_date, runner, strict) for org in chunk )() res.get() runner.location = None runner.save() # finally go back through the history and initialize empty data for any # newly created facilities update_historical_data(domain)
def insta_posts_py(query, scope, max_posts, scrape_comments, save_path="", since="", until=""): """ Run custom search Fetches data from Instagram via instaloader. """ # this is useful to include in the results because researchers are # always thirsty for them hashtags hashtag = re.compile(r"#([^\s,.+=-]+)") mention = re.compile(r"@([a-zA-Z0-9_]+)") queries = query.split(",") if since != "" and until != "": since = since.split("-") until = until.split("-") for item in range(len(since)): since[item] = int(since[item]) for item in range(len(until)): until[item] = int(until[item]) since = datetime(since[0], since[1], since[2]) until = datetime(until[0], until[1], until[2]) # return queries posts = [] # for each query, get items for query in queries: chunk_size = 0 print("Retrieving posts ('%s')" % query) try: if scope == "hashtag": query = query.replace("#", "") hashtag_obj = instaloader.Hashtag.from_name( instagram.context, query) chunk = hashtag_obj.get_posts() elif scope == "username": query = query.replace("@", "") profile = instaloader.Profile.from_username( instagram.context, query) chunk = profile.get_posts() else: print("Invalid search scope for instagram scraper: %s" % repr(scope)) return [] # "chunk" is a generator so actually retrieve the posts next posts_processed = 0 # go through posts, and retrieve comments results = [] results_posts = [] for post in chunk: chunk_size += 1 print("Retrieving posts ('%s', %i posts)" % (query, chunk_size)) if posts_processed >= max_posts: break try: posts.append(chunk.__next__()) posts_processed += 1 comments_bit = " and comments" if scrape_comments == True else "" if since != "" and until != "": posts = takewhile( lambda p: p.date > until, dropwhile(lambda p: p.date > since, posts)) for post in posts: print("Retrieving metadata%s for post %i" % (comments_bit, posts_processed)) thread_id = post.shortcode try: results_posts.append({ "id": str(thread_id), "thread_id": str(thread_id), "parent_id": str(thread_id), "body": post.caption if post.caption is not None else "", "author": post.owner_username, "timestamp": post.date_utc.timestamp(), "type": "video" if post.is_video else "picture", "url": post.video_url if post.is_video else post.url, "thumbnail_url": post.url, "hashtags": ",".join(post.caption_hashtags), "usertags": ",".join(post.tagged_users), "mentioned": ",".join( mention.findall(post.caption) if post. caption else ""), "num_likes": post.likes, "num_comments": post.comments, "level": "post", "query": query }) except (instaloader.QueryReturnedNotFoundException, instaloader.ConnectionException): pass if not scrape_comments == True: if save_path != "": save_csv(save_path, results_posts) results.append(results_posts) continue if (posts_processed % 10 == 0): wait_time = randint(300, 500) print("Wating for " + str(wait_time) + " seconds.") sleep(wait_time) else: wait_time = randint(20, 30) print("Wating for " + str(wait_time) + " seconds.") sleep(wait_time) try: for comment in post.get_comments(): answers = [ answer for answer in comment.answers ] try: results_posts.append({ "id": str(comment.id), "thread_id": str(thread_id), "parent_id": str(thread_id), "body": comment.text, "author": comment.owner.username, "timestamp": comment.created_at_utc.timestamp(), "type": "comment", "url": "", "hashtags": ",".join(hashtag.findall( comment.text)), "usertags": "", "mentioned": ",".join(mention.findall( comment.text)), "num_likes": comment.likes_count if hasattr( comment, "likes_count") else 0, "num_comments": len(answers), "level": "comment", "query": query }) except instaloader.QueryReturnedNotFoundException: pass # instagram only has one reply depth level at the time of # writing, represented here for answer in answers: try: results_posts.append({ "id": str(answer.id), "thread_id": str(thread_id), "parent_id": str(comment.id), "body": answer.text, "author": answer.owner.username, "timestamp": answer.created_at_utc.timestamp(), "type": "comment", "url": "", "hashtags": ",".join( hashtag.findall(answer.text)), "usertags": "", "mentioned": ",".join( mention.findall(answer.text)), "num_likes": answer.likes_count if hasattr( answer, "likes_count") else 0, "num_comments": 0, "level": "answer", "query": query }) except instaloader.QueryReturnedNotFoundException: pass except instaloader.QueryReturnedNotFoundException: pass if save_path != "": save_csv(save_path, results_posts) except instaloader.QueryReturnedNotFoundException: pass except (instaloader.QueryReturnedNotFoundException, instaloader.ConnectionException): # data not available...? this happens sometimes, not clear why pass results.append(results_posts) return results
def burn_blanks(c): return list(dropwhile(self._is_blank, c))
def __call__(self, iterator): return itertools.dropwhile(self.function, iterator)
l_1 = [1, 2, 3, 4, 5, 6, 7, 8] print('List: ', l_1) print('Filtered Elements greater than 4: ', list(filter(lambda x: x > 4, l_1))) print( '\n------------------------------ Compress Filter ------------------------------' ) l_2 = ['a', 'b', 'c', 'd', 'e'] selectors = [True, False, 1, 0] print(f'Using Compress Filter on: {l_2}\nUsing Selector: {selectors}') # 1 to 1 mapping, like zip: a -> True, b -> False, c -> 1, d -> 0, e -> None print('Compress Filtered: ', list(itertools.compress(l_2, selectors))) print( '\n------------------------------ Takewhile Filter ------------------------------' ) l_3 = [1, 3, 5, 2, 0] print('List: ', l_3) print('Takewhile for (< 5): ', list(itertools.takewhile(lambda x: x < 5, l_3))) print( '\n------------------------------ Dropwhile Filter ------------------------------' ) l_4 = [1, 3, 5, 2, 0] print('List: ', l_4) print('Dropwhile for (< 5): ', list(itertools.dropwhile(lambda x: x < 5, l_4)))
def __init__(self, experiment_config_name, extra_dict={}, config_roots=[''], getexp=False): '''Read experiment config to get basic settings TODO: probably nicer if default experiment is given as argument ''' # State variables self.version_info_missing = False # # Helper functions # def split_jobs(config): '''Post-process job definition to allow for shared configs as [[job1, job2]]''' if 'jobs' in config: sep = re.compile(r'\s*,\s*') for subjobs, subconfig in config['jobs'].iteritems(): if re.search(sep, subjobs): for subjob in re.split(sep, subjobs): if subjob in config['jobs']: config['jobs'][subjob].merge(subconfig.dict()) else: config['jobs'][subjob] = subconfig.dict() del config['jobs'][subjobs] def get_config_name(lib_name, base_name): '''Cycle through config path until a match is found. Return simple path otherwise''' config_name = os.path.join(lib_name, base_name) for config_root in config_roots: tentative_name = os.path.join(config_root, config_name) if os.path.exists(tentative_name): config_name = tentative_name break return config_name def read_value(value): if os.path.exists(value): stream = open(value) result = stream.read().strip() stream.close() else: result = '' return result def sec2time(seconds): '''Create time string (HH:MM:SS) from second of day''' seconds = int(seconds) if seconds >= 86400: raise ValueError("invalid second of day '{0}'".format(seconds)) minutes, s = divmod(seconds, 60) h, m = divmod(minutes, 60) return "{0:02}:{1:02}:{2:02}".format(h, m, s) def split_date(value): '''Re-format datetime string to list for use in namelists''' match = re.match(r'^0*(\d+)-0*(\d+)-0*(\d+)' r'([T ]0*(\d+)(:0*(\d+)(:0*(\d+))?)?)?$', value) if match: return [match.groups('0')[i] for i in [0,1,2,4,6,8]] match = re.match(r'^0*(\d+?)(\d{2})(\d{2})' r'([T ]0*(\d+)(:0*(\d+)(:0*(\d+))?)?)?$', value) if match: return [match.groups('0')[i] for i in [0,1,2,4,6,8]] raise ValueError("invalid date/time '{0}'".format(value)) def add_years(value, years): '''Add specified number of years (possible negative) to date''' years = int(years) dt = map(int, split_date(value)) dt[0] += years return "{0:+05}-{1:02}-{2:02}".format(*dt).lstrip('+') def add_days(value, days): '''Add specified number of days (possible negative) to date''' def leap(year): return (not year % 4) and (not (not year % 100) or (not year % 400)) def monlen(year, mon): monlens = (0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 0) return monlens[mon] + (mon == 2 and leap(year)) def add_days_(year, mon, day, days): while True: if mon == 0: year -= 1 mon = 12 day = monlen(year, 12) continue if mon == 13: year += 1 mon = 1 day = 1 continue if day + days <= 0: days += day mon -= 1 day = monlen(year, mon) continue if day + days > monlen(year, mon): days -= monlen(year, mon) - day + 1 mon += 1 day = 1 continue day += days break return (year, mon, day) days = int(days) dt = map(int, split_date(value)) dt = add_days_(dt[0], dt[1], dt[2], days) return "{0:+05}-{1:02}-{2:02}".format(*dt).lstrip('+') def eval_value(value): ''' Evaluate key as python expression, return as string or sequence of strings. ''' result = eval(value) if isinstance(result, (list, tuple)): result = map(str, result) else: result = str(result) return result def eval_value_string(value): ''' Evaluate key as python expression, return as string or sequence of strings. ''' result = eval_value(value) if isinstance(result, (list, tuple)): result = ", ".join(result) return result def eval_expression(value): ''' Check if value is a supported expression. If so, evaluate and return result, otherwise just pass through. ''' match = re.match(r'^eval\((.*)\)$', value, re.S) if match: return eval_value(match.group(1)) match = re.match(r'^evals\((.*)\)$', value, re.S) if match: return eval_value_string(match.group(1)) match = re.match(r'^add_(years|days)\(\s*([-\d]+([T ][\d:]+)?)\s*,\s*([-+]?\d+)\s*\)$', value, re.S) if match: if match.group(1) == 'days': return add_days(match.group(2), match.group(4)) return add_years(match.group(2), match.group(4)) match = re.match(r'^split_date\((.*)\)$', value, re.S) if match: return split_date(match.group(1)) match = re.match(r'^sec2time\((.*)\)$', value, re.S) if match: return sec2time(match.group(1)) match = re.match(r'^read\((.*)\)$', value, re.S) if match: return read_value(match.group(1)) return value # Interpolate and evaluate keys if they are an expression def eval_key(section, key): try: value = section[key] if isinstance(value, (list, tuple)): value = map(eval_expression, value) elif isinstance(value, basestring): value = eval_expression(value) if isinstance(value, (list, tuple)): value = [v.replace('$', '$$') for v in value] elif isinstance(value, basestring): value = value.replace('$', '$$') except (InterpolationError, ValueError) as error: raise ExpConfigError(error.message, key) section[key] = value # Undo remaining changes from walk with eval_key def uneval_key(section, key): try: value = section[key] if isinstance(value, (list, tuple)): value = [v.replace('$$', '$') for v in value] elif isinstance(value, basestring): value = value.replace('$$', '$') except (InterpolationError, ValueError) as error: raise ExpConfigError(error.message, key) section[key] = value # Move version info from local config to global list def register_version(pre_config, config_versions): if 'VERSION_' in pre_config: config_versions.append(pre_config['VERSION_']) del pre_config['VERSION_'] else: self.version_info_missing = True # # Method body # # Pre-read basic experiment settings pre_config = None setup_config_name = get_config_name('', ExpConfig.setup_config_name) if os.path.exists(setup_config_name): pre_config = ConfigObj(setup_config_name, interpolation=False) user_config = ConfigObj(experiment_config_name, interpolation=False) if pre_config: pre_config.merge(user_config) else: pre_config = user_config experiment_type = extra_dict.get('EXP_TYPE', pre_config['EXP_TYPE']) # Empty environment should load default environment = extra_dict.get('ENVIRONMENT', pre_config.get('ENVIRONMENT', ExpConfig.default_name)) # Options should always be treated as a list setup_options = extra_dict.get('SETUP_OPTIONS', pre_config.get('SETUP_OPTIONS', '')) if isinstance(setup_options, basestring): if setup_options: setup_options = [setup_options] else: setup_options = [] exp_options = extra_dict.get('EXP_OPTIONS', pre_config.get('EXP_OPTIONS', '')) if isinstance(exp_options, basestring): if exp_options: exp_options = [exp_options] else: exp_options = [] options = setup_options + exp_options # Backwards compatibility ENVIRONMENT -> QUEUE_TYPE if environment == ExpConfig.default_name and 'QUEUE_TYPE' in pre_config: feedback.warning("found obsolete keyword 'QUEUE_TYPE'; " "should be replaced by 'ENVIRONMENT'") environment = pre_config['QUEUE_TYPE'] # Load default if environment was deliberately set to empty if not environment: environment = ExpConfig.default_name pre_config = None user_config = None # Start from empty configuration pre_config = ConfigObj(interpolation=False) config_versions = [] # Get default experiment id from file name pre_config[ExpConfig.id_name] = os.path.splitext( os.path.basename(experiment_config_name) )[0] # Read Environment env_dict = dict(os.environ) if not getexp: # Mask literal dollar characters for key, value in env_dict.iteritems(): env_dict[key] = value.replace('$', '$$') pre_config.merge({'DEFAULT': {}}) for key, value in sorted(env_dict.iteritems()): pre_config['DEFAULT'][key] = value # Read experiment settings from library (default and type specific) lib_config_name = get_config_name(ExpConfig.exp_lib_dir, ExpConfig.default_name+'.config') pre_config.merge(ConfigObj(lib_config_name, interpolation=False)) split_jobs(pre_config) register_version(pre_config, config_versions) if os.path.exists(setup_config_name): pre_config.merge(ConfigObj(setup_config_name, interpolation=False)) split_jobs(pre_config) register_version(pre_config, config_versions) lib_config_name = get_config_name(ExpConfig.exp_lib_dir, experiment_type+'.config') if os.path.exists(lib_config_name): pre_config.merge(ConfigObj(lib_config_name, interpolation=False)) split_jobs(pre_config) register_version(pre_config, config_versions) else: feedback.warning("cannot find experiment config for '%s', "+ "using default only", experiment_type) for option in options: lib_config_name = get_config_name(ExpConfig.opt_lib_dir, option+'.config') if os.path.exists(lib_config_name): pre_config.merge(ConfigObj(lib_config_name, interpolation=False)) split_jobs(pre_config) register_version(pre_config, config_versions) else: feedback.warning("cannot find config for option '%s', using "+ "default/experiment type only", option) # Read host environment settings from library lib_config_name = get_config_name(ExpConfig.env_lib_dir, environment+'.config') if os.path.exists(lib_config_name): pre_config.merge(ConfigObj(lib_config_name, interpolation=False)) register_version(pre_config, config_versions) # Warn user if at least one config had no version info if self.version_info_missing: feedback.info("version info for standard config is incomplete") # Re-read config to allow overriding default settings # TODO: probably nicer if default experiment is given as argument experiment_config = ConfigObj(experiment_config_name, interpolation=False) pre_config.merge(experiment_config) split_jobs(pre_config) # Add extra dictionary pre_config.merge(extra_dict) # Backwards compatibility ENVIRONMENT -> QUEUE_TYPE pre_config['ENVIRONMENT'] = environment # Add complete versioning info if not getexp: pre_config['VERSIONS_'] = config_versions # Re-read merged config with interpolation set. # This works around incomprehensible inheritance of interpolation with # merge. Make sure that all values are interpolated config_lines = StringIO.StringIO() pre_config.write(config_lines) pre_config = None config_lines.seek(0) pre_config = ConfigObj(config_lines, interpolation=False if getexp else 'template') # Extract experiment description from initial comment # if not set explicitly if not pre_config.has_key('EXP_DESCRIPTION'): is_empty = lambda s: re.match(r'^[\s#]*$', s) rm_comment = lambda s: re.sub(r'^\s*# ?', '', s) pre_config['EXP_DESCRIPTION'] = "\n".join( reversed(list( dropwhile(is_empty, reversed(list( dropwhile(is_empty, map(rm_comment, experiment_config.initial_comment) ) )) ) )) ) pre_config.walk(eval_key) # Re-read final config without interpolation. # This allows copying data without evaluation of version keywords. config_lines.seek(0) config_lines.truncate() pre_config.write(config_lines) pre_config = None config_lines.seek(0) ConfigObj.__init__(self, config_lines, interpolation=False) self.walk(uneval_key) self.experiment_id = self[ExpConfig.id_name] self.experiment_kind = re.sub(r'-\w+$', '', experiment_type)
def drop_until(iterable, low): for i in iterable: if i < low: continue yield i def take_until(iterable, high): for i in iterable: if i > high: break yield i def take_between_v2(iterable, low, high): for i in take_until(drop_until(iterable, low), high): yield i for i in take_between_v2(take_even(fibonacci()), 100, 1000): print(i) print( list( itertools.takewhile( lambda i: i <= 1000, itertools.dropwhile(lambda i: i < 100, filter(lambda i: i % 2 == 0, fibonacci())), )))
def hash_test_code(main_path): """Hashes file main_path.""" with open(main_path) as main: test_code_hash = hashlib.sha256() for line in main: test_code_hash.update(line.encode()) return test_code_hash.hexdigest() PROFESSOR_TEST_CODE_HEXDIGEST = '22c0d504a3335886a369d75f72f07474b1d10599c294b1b45770e9ffdbc43b95' PROFESSOR_CHIFFRE_HEXDIGEST = '60ff41b09e4e1011d3a5f33704ec53df319a248d1de48250a131b809a85cb2db' PROFESSOR_CLAIR_HEXDIGEST = '4ef57703aad7ffd9f3129bb46c81a15308f1963e1f12ab00718f3569fde090f3' CALLBACKS = pygit2.RemoteCallbacks(credentials=pygit2.KeypairFromAgent("git")) with open('depots.txt') as remote_depot_names: for remote_depot_name in itertools.dropwhile( lambda line: line.startswith('#'), remote_depot_names): try: # Craft URL to clone given a depot name. remote_depot_name = remote_depot_name.rstrip() remote_depot_url = 'ssh://[email protected]/' + remote_depot_name + '.git' local_depot_path = remote_depot_name.replace('/', '-') print(local_depot_path, end=' ') # Clone the repo. if pygit2.clone_repository(remote_depot_url, local_depot_path, callbacks=CALLBACKS) \ is None: raise RuntimeError('-1') # Confirm test code is intact. if hash_test_code(local_depot_path + '/test/main.c') != PROFESSOR_TEST_CODE_HEXDIGEST or \ hash_test_code(local_depot_path + '/test/chiffre.txt') != PROFESSOR_CHIFFRE_HEXDIGEST or \
def skip(iter, N): from itertools import dropwhile return dropwhile(lambda n_rec: n_rec[0] < N, enumerate(iter))
assert next(a) == 'b' assert next(a) == 'c' assert next(a) == 'a' a = itertools.cycle(range(3)) assert next(a) == 0 assert next(a) == 1 assert next(a) == 2 assert next(a) == 0 assert next(a) == 1 assert next(a) == 2 ######################### ## Tests for dropwhile ## ######################### a = itertools.dropwhile(lambda x: x < 5, [1, 4, 6, 4, 1]) assert next(a) == 6 assert next(a) == 4 assert next(a) == 1 a = itertools.dropwhile(lambda x: x == 'p', 'pbrython') assert list(a) == ['b', 'r', 'y', 't', 'h', 'o', 'n'] ########################### ## Tests for filterfalse ## ########################### a = itertools.filterfalse(lambda x: x % 2, range(10)) assert next(a) == 0 assert next(a) == 2 assert next(a) == 4
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2021/10/19 上午10:57 # @Author : liuzh # @desc : from itertools import dropwhile # 跳过开始地方的注释行 with open('4.8.txt') as f: for line in dropwhile(lambda line: line.startswith('#'), f): print(line, end='') # 如果已知跳过的行号[3:] from itertools import islice items = ['a', 'b', 'c', 1, 4, 10, 15] # 相当于:[3:] for x in islice(items, 3, None): print(x) # 如果互换None和3的位置:[:3] for x in islice(items, None, 3): print(x) # 原来写法 with open('4.8.txt') as f: # Skip over initial comments while True: line = next(f, '') if not line.startswith('#'): break
glbs.MinTumorDepth = 20 glbs.MinNormalDepth = 8 glbs.MultipleTumorNormalFreq = 5 glbs.MinAD = 5 glbs.MinAltFreq = 0.01 fin = open(args.maf) ofh = open(args.output, "wt") comments = ifilter(lambda L: L.startswith("#"), fin) for line in comments: ofh.write(line) fin.seek(0) start = dropwhile(lambda L: L.lower().lstrip().startswith('#'), fin) cin = csv.DictReader(start, delimiter="\t") fnames = cin.fieldnames fnames.append("Caller") cout = csv.DictWriter(ofh, fieldnames=fnames, delimiter="\t") cout.writeheader() for recDict in cin: try: recDict["Caller"] = args.caller rec = Struct(**recDict) ## For both somatic and not somatic if rec.FILTER and rec.FILTER.find("LowQual") > -1: continue
def vowel(c): return c.lower() in 'aeiou' print('{:*^60}'.format('filter()')) print(list(filter(vowel, 'Aardvark'))) print(list(filter(lambda c: c.lower() in 'aeiou', 'Aardvark'))) import itertools print('{:*^60}'.format('itertools.filterfalse()')) print(list(itertools.filterfalse(vowel, 'Aardvark'))) print('{:*^60}'.format('itertools.dropwhile()')) print(list(itertools.dropwhile(vowel, 'Aardvark'))) print('{:*^60}'.format('itertools.dropwhile()')) print(list(itertools.takewhile(vowel, 'Aardvark'))) print('{:*^60}'.format('itertools.compress()')) print(list(itertools.compress('Aardvark', (1, 0, 1, 1, 0, 1)))) print('{:*^60}'.format('itertools.islice(, 4)')) print(list(itertools.islice('Aardvark', 4))) print('{:*^60}'.format('itertools.islice(, 4, 7)')) print(list(itertools.islice('Aardvark', 4, 7))) print('{:*^60}'.format('itertools.islice(, 1, 7, 2)')) print(list(itertools.islice('Aardvark', 1, 7, 2)))
def removestart(self, file): """Remove the header of the file.""" return list(itertools.dropwhile(lambda l: l.startswith("#"), file))
def make_messages(locale=None, domain='django', verbosity='1', all=False, extensions=None): """ Uses the locale directory from the Django SVN tree or an application/ project to process all """ # Need to ensure that the i18n framework is enabled from django.conf import settings if settings.configured: settings.USE_I18N = True else: settings.configure(USE_I18N=True) from django.utils.translation import templatize if os.path.isdir(os.path.join('conf', 'locale')): localedir = os.path.abspath(os.path.join('conf', 'locale')) elif os.path.isdir('locale'): localedir = os.path.abspath('locale') else: raise CommandError( "This script should be run from the Django SVN tree or your project or app tree. If you did indeed run it from the SVN checkout or your project or application, maybe you are just missing the conf/locale (in the django tree) or locale (for project and application) directory? It is not created automatically, you have to create it by hand if you want to enable i18n for your project or application." ) if domain not in ('django', 'djangojs'): raise CommandError( "currently makemessages only supports domains 'django' and 'djangojs'" ) if (locale is None and not all) or domain is None: # backwards compatible error message if not sys.argv[0].endswith("make-messages.py"): message = "Type '%s help %s' for usage.\n" % (os.path.basename( sys.argv[0]), sys.argv[1]) else: message = "usage: make-messages.py -l <language>\n or: make-messages.py -a\n" raise CommandError(message) languages = [] if locale is not None: languages.append(locale) elif all: languages = [ el for el in os.listdir(localedir) if not el.startswith('.') ] for locale in languages: if verbosity > 0: print "processing language", locale basedir = os.path.join(localedir, locale, 'LC_MESSAGES') if not os.path.isdir(basedir): os.makedirs(basedir) pofile = os.path.join(basedir, '%s.po' % domain) potfile = os.path.join(basedir, '%s.pot' % domain) if os.path.exists(potfile): os.unlink(potfile) all_files = [] for (dirpath, dirnames, filenames) in os.walk("."): all_files.extend([(dirpath, f) for f in filenames]) all_files.sort() for dirpath, file in all_files: file_base, file_ext = os.path.splitext(file) if domain == 'djangojs' and file_ext == '.js': if verbosity > 1: sys.stdout.write('processing file %s in %s\n' % (file, dirpath)) src = open(os.path.join(dirpath, file), "rb").read() src = pythonize_re.sub('\n#', src) open(os.path.join(dirpath, '%s.py' % file), "wb").write(src) thefile = '%s.py' % file cmd = 'xgettext -d %s -L Perl --keyword=gettext_noop --keyword=gettext_lazy --keyword=ngettext_lazy:1,2 --from-code UTF-8 -o - "%s"' % ( domain, os.path.join(dirpath, thefile)) (stdin, stdout, stderr) = os.popen3(cmd, 't') msgs = stdout.read() errors = stderr.read() if errors: raise CommandError( "errors happened while running xgettext on %s\n%s" % (file, errors)) old = '#: ' + os.path.join(dirpath, thefile)[2:] new = '#: ' + os.path.join(dirpath, file)[2:] msgs = msgs.replace(old, new) if os.path.exists(potfile): # Strip the header msgs = '\n'.join(dropwhile(len, msgs.split('\n'))) else: msgs = msgs.replace('charset=CHARSET', 'charset=UTF-8') if msgs: open(potfile, 'ab').write(msgs) os.unlink(os.path.join(dirpath, thefile)) elif domain == 'django' and (file_ext == '.py' or file_ext in extensions): thefile = file if file_ext in extensions: src = open(os.path.join(dirpath, file), "rb").read() thefile = '%s.py' % file open(os.path.join(dirpath, thefile), "wb").write(templatize(src)) if verbosity > 1: sys.stdout.write('processing file %s in %s\n' % (file, dirpath)) cmd = 'xgettext -d %s -L Python --keyword=gettext_noop --keyword=gettext_lazy --keyword=ngettext_lazy:1,2 --keyword=ugettext_noop --keyword=ugettext_lazy --keyword=ungettext_lazy:1,2 --from-code UTF-8 -o - "%s"' % ( domain, os.path.join(dirpath, thefile)) (stdin, stdout, stderr) = os.popen3(cmd, 't') msgs = stdout.read() errors = stderr.read() if errors: raise CommandError( "errors happened while running xgettext on %s\n%s" % (file, errors)) if thefile != file: old = '#: ' + os.path.join(dirpath, thefile)[2:] new = '#: ' + os.path.join(dirpath, file)[2:] msgs = msgs.replace(old, new) if os.path.exists(potfile): # Strip the header msgs = '\n'.join(dropwhile(len, msgs.split('\n'))) else: msgs = msgs.replace('charset=CHARSET', 'charset=UTF-8') if msgs: open(potfile, 'ab').write(msgs) if thefile != file: os.unlink(os.path.join(dirpath, thefile)) if os.path.exists(potfile): (stdin, stdout, stderr) = os.popen3('msguniq --to-code=utf-8 "%s"' % potfile, 'b') msgs = stdout.read() errors = stderr.read() if errors: raise CommandError( "errors happened while running msguniq\n%s" % errors) open(potfile, 'w').write(msgs) if os.path.exists(pofile): (stdin, stdout, stderr) = os.popen3( 'msgmerge -q "%s" "%s"' % (pofile, potfile), 'b') msgs = stdout.read() errors = stderr.read() if errors: raise CommandError( "errors happened while running msgmerge\n%s" % errors) open(pofile, 'wb').write(msgs) os.unlink(potfile)
# ['foo', 'bar', 'ls', '/some/dir', 0, 1, 2, 3, 4] print (list(chain.from_iterable([cmd, numbers]))) # ['ls', '/some/dir', 0, 1, 2, 3, 4] from itertools import compress letters = 'ABCDEFG' bools = [True, False, True, True, False] print (list(compress(letters, bools))) # checks the first against the second, keeps the first if match is True # ['A', 'C', 'D'] from itertools import dropwhile print (list(dropwhile(lambda x: x < 5, [1, 4, 6, 4, 1]))) # drops elements as long as the filter criteria is True # [6, 4, 1] def great_than_five(x): return x > 5 # Once we hit a value that is less than 5, then ALL the values after and including that value will be kept print (list(dropwhile(great_than_five, [6, 7, 8, 9, 1, 2, 3, 10]))) # [1, 2, 3, 10] from itertools import filterfalse # filterfalse will only return those values that evaluated to False print (list(filterfalse(great_than_five, [6, 7, 8, 9, 1, 2, 3, 10]))) # [1, 2, 3]
url = 'http://download.geonames.org/export/dump/countryInfo.txt' fields = 'ISO', 'ISO3', 'ISOnumeric', 'fips', 'name', 'capital', 'area', 'population', 'continent', 'tld', 'currencyCode', 'currencyName', 'phone', 'postalCodeFormat', 'postalCodeRegex', 'languages', 'id', 'neighbours', 'equivalentFipsCode' split_to_set = lambda s: set(s.split(',')) types = { 'area': float, 'id': int, 'population': int, 'ISOnumeric': int, 'languages': split_to_set, 'neighbours': split_to_set } f = urllib.urlopen(url) Country = collections.namedtuple('Country', fields) source = itertools.dropwhile(lambda l: l.startswith('#'), f) reader = csv.DictReader(source, fields, delimiter='\t') print 'import collections' print 'Country = collections.namedtuple(\'Country\', {})'.format(fields) print 'countries = [' for line in reader: for field in fields: t = types.get(field, str) attr = line[field].strip() line[field] = t(attr) if attr else None print ' {},'.format(Country(**line)) print ']'
def go_data(ofile): """Skip header. the first next() call of the returned iterator will be the @data line""" return itertools.dropwhile(lambda x: not r_datameta.match(x), ofile)
def main(): entries = read_data() view_names = set() interesting_attributes = Counter() for i in entries: view_names.add(i['view_name']) pathdir = i['path'].split('/')[1:] for j in pathdir: interesting_attributes[j] += 1 for key, count in dropwhile(lambda key_count: key_count[1] >= S['il'], interesting_attributes.most_common()): del interesting_attributes[key] view_names = list(view_names) i2n, n2i = dict(), dict() for i, n in enumerate(view_names): i2n[i] = n n2i[n] = i interesting_attributes = interesting_attributes.keys() i2a, a2i = dict(), dict() for i, a in enumerate(interesting_attributes): i2a[i] = a a2i[a] = i global_step= tf.Variable(0., False, dtype=tf.float32) x = tf.placeholder(tf.float32, [None, len(interesting_attributes)], name = 'input') y_ = tf.placeholder(tf.float32, [None, len(view_names)], name = 'label') W = tf.Variable(tf.random_normal([len(interesting_attributes), len(view_names)]), dtype = tf.float32) b = tf.Variable(tf.random_normal([len(view_names)], dtype = tf.float32)) linear = tf.matmul(x, W) + b pred = tf.argmax(tf.nn.softmax(linear), 1) actual = tf.argmax(y_, 1) accuracy = tf.reduce_mean(tf.cast(tf.equal(pred, actual), tf.float32)) loss = tf.losses.softmax_cross_entropy(y_, linear) learning_rate = tf.train.exponential_decay(S['lr'], global_step, 500, 0.5, staircase = True) optimizer = tf.train.AdamOptimizer(learning_rate) train = optimizer.minimize(loss, global_step) init = tf.global_variables_initializer() with tf.Session() as sess: tf.set_random_seed(0) sess.run(init) for i in range(S['ti']): feed_indices = np.random.randint(low = 0, high = len(entries), size = S['bs']) feed_input_arrays = [] feed_labels = np.array([[0.] * len(view_names) for _ in range(S['bs'])], dtype = np.float32) for ji, j in enumerate(feed_indices): ia = np.zeros(len(interesting_attributes), dtype = np.float32) feed_labels[ji][n2i[entries[j]['view_name']]] = 1. for k in entries[j]['path'].split('/')[1:]: if k in interesting_attributes: ia[a2i[k]] = 1. feed_input_arrays.append(ia) feed_input = np.stack(feed_input_arrays) l, a, _ = sess.run((loss, accuracy, train), feed_dict={x: feed_input, y_: feed_labels}) print('batch {:4}/{}: accuracy = {:.2f}%, loss = {}'.format(i+1, S['ti'], a*100, l)) test_indices = np.random.randint(low = 0, high = len(entries), size = S['tc']) test_input_arrays = [] test_labels = np.array([[0.] * len(view_names) for _ in range(S['tc'])], dtype = np.float32) for ii, i in enumerate(test_indices): ia = np.zeros(len(interesting_attributes), dtype = np.float32) test_labels[ii][n2i[entries[i]['view_name']]] = 1 for j in entries[i]['path'].split('/')[1:]: if j in interesting_attributes: ia[a2i[j]] = 1 test_input_arrays.append(ia) test_input = np.stack(test_input_arrays) test_begin = datetime.now() test_p, test_a, acc, los = sess.run((pred, actual, accuracy, loss), feed_dict={x: test_input, y_: test_labels}) test_end = datetime.now() test_elapse = test_end - test_begin print('{} tests completed in {} seconds\n Accuracy: {:.2f}%\n Loss: {}\n\n\n'.format(S['tc'], test_elapse.total_seconds(), acc*100, los)) for ti, (tp, ta) in enumerate(zip(test_p, test_a)): if tp != ta: print('Mismatch:\n Path: {}\n Should obtain {},\n got {}'.format(entries[test_indices[ti]]['path'], i2n[ta], i2n[tp]))
def strip_head(items): return itertools.dropwhile(lambda i: i is self.SEPARATOR, items)
def __iter__(self): self.iterobj = itertools.dropwhile(lambda x: x.startswith('#'), self.iterInfo) return self
from itertools import dropwhile, islice lines = [ "# comment1", "# comment2", "# comment3", "data 1", "data 2", "data 3" ] for line in dropwhile(lambda l: l.startswith("#"), lines): print(line) print() # if the number of items to skip is known, can use islice for x in islice(lines, 3, None): # passing None indicates everything after the first 3 items is desired, like [3:] print(x)
from itertools import dropwhile def should_drop(x): print('Testing:', x) return x < 1 for i in dropwhile(should_drop, [-1, 0, 1, 2, -2]): print('Yielding:', i)
def outgoing_connections(self): """Returns a list of all outgoing connections for this peer.""" # Outgoing connections are on the right return list( dropwhile(lambda c: c.direction != OUTGOING, self.connections))
def get_after(sentinel, iterable): "Get the value after `sentinel` in an `iterable`" truncated = dropwhile(lambda el: el != sentinel, iterable) next(truncated) return next(truncated)
def dropwhile(self, predicate: Predicate) -> 'Stream[T]': return self.next(lambda xs: itertools.dropwhile(predicate, xs))
def __parseLine(self, line): if line[0:1] == "#": # Debug line which we shall ignore as specified in CECPv2 specs return # log.debug("__parseLine: line=\"%s\"" % line.strip(), extra={"task":self.defname}) parts = whitespaces.split(line.strip()) if parts[0] == "pong": self.lastpong = int(parts[1]) return # Illegal Move if parts[0].lower().find("illegal") >= 0: log.warning("__parseLine: illegal move: line=\"%s\", board=%s" \ % (line.strip(), self.board), extra={"task":self.defname}) if parts[-2] == "sd" and parts[-1].isdigit(): print("depth", parts[-1], file=self.engine) return # A Move (Perhaps) if self.board: if parts[0] == "move": movestr = parts[1] # Old Variation elif d_plus_dot_expr.match(parts[0]) and parts[1] == "...": movestr = parts[2] else: movestr = False if movestr: log.debug("__parseLine: acquiring self.boardLock", extra={"task": self.defname}) self.waitingForMove = False self.readyForMoveNowCommand = False self.boardLock.acquire() try: if self.engineIsInNotPlaying: # If engine was set in pause just before the engine sent its # move, we ignore it. However the engine has to know that we # ignored it, and thus we step it one back log.info("__parseLine: Discarding engine's move: %s" % movestr, extra={"task": self.defname}) print("undo", file=self.engine) return else: try: move = parseAny(self.board, movestr) except ParsingError as e: self.end( WHITEWON if self.board.color == BLACK else BLACKWON, WON_ADJUDICATION) return if validate(self.board, move): self.board = None self.returnQueue.put(move) return self.end( WHITEWON if self.board.color == BLACK else BLACKWON, WON_ADJUDICATION) return finally: log.debug("__parseLine(): releasing self.boardLock", extra={"task": self.defname}) self.boardLock.release() self.movecon.acquire() self.movecon.notifyAll() self.movecon.release() # Analyzing if self.engineIsInNotPlaying: if parts[:4] == ["0", "0", "0", "0"]: # Crafty doesn't analyze until it is out of book print("book off", file=self.engine) return match = anare.match(line) if match: depth, score, moves = match.groups() if "mat" in score.lower() or "#" in moves: # Will look either like -Mat 3 or Mat3 scoreval = MATE_VALUE if score.startswith('-'): scoreval = -scoreval else: scoreval = int(score) mvstrs = movere.findall(moves) try: moves = listToMoves(self.board, mvstrs, type=None, validate=True, ignoreErrors=False) except: # Errors may happen when parsing "old" lines from # analyzing engines, which haven't yet noticed their new tasks log.debug('Ignored an "old" line from analyzer: %s %s' % (self.board, mvstrs), extra={"task": self.defname}) return # Don't emit if we weren't able to parse moves, or if we have a move # to kill the opponent king - as it confuses many engines if moves and not self.board.board.opIsChecked(): self.emit("analyze", [(moves, scoreval, depth.strip())]) return # Offers draw if parts[0:2] == ["offer", "draw"]: self.emit("accept", Offer(DRAW_OFFER)) return # Resigns if parts[0] == "resign" or \ (parts[0] == "tellics" and parts[1] == "resign"): # buggy crafty # Previously: if "resign" in parts, # however, this is too generic, since "hint", "bk", # "feature option=.." and possibly other, future CECPv2 # commands can validly contain the word "resign" without this # being an intentional resign offer. self.emit("offer", Offer(RESIGNATION)) return #if parts[0].lower() == "error": # return #Tell User Error if parts[0] == "tellusererror": # We don't want to see our stop analyzer hack as an error message if "8/8/8/8/8/8/8/8" in "".join(parts[1:]): return # Create a non-modal non-blocking message dialog with the error: dlg = Gtk.MessageDialog(parent=None, flags=0, type=Gtk.MessageType.WARNING, buttons=Gtk.ButtonsType.CLOSE, message_format=None) # Use the engine name if already known, otherwise the defname: displayname = self.name if not displayname: displayname = self.defname # Compose the dialog text: dlg.set_markup( GObject.markup_escape_text( _("The engine %s reports an error:") % displayname) + "\n\n" + GObject.markup_escape_text(" ".join(parts[1:]))) # handle response signal so the "Close" button works: dlg.connect("response", lambda dlg, x: dlg.destroy()) dlg.show_all() return # Tell Somebody if parts[0][:4] == "tell" and \ parts[0][4:] in ("others", "all", "ics", "icsnoalias"): log.info("Ignoring tell %s: %s" % (parts[0][4:], " ".join(parts[1:]))) return if "feature" in parts: # Some engines send features after done=1, so we will iterate after done=1 too done1 = False # We skip parts before 'feature', as some engines give us lines like # White (1) : feature setboard=1 analyze...e="GNU Chess 5.07" done=1 parts = parts[parts.index("feature"):] for i, pair in enumerate(parts[1:]): # As "parts" is split with no thoughs on quotes or double quotes # we need to do some extra handling. if pair.find("=") < 0: continue key, value = pair.split("=", 1) if not key in self.features: continue if value.startswith('"') and value.endswith('"'): value = value[1:-1] # If our pair was unfinished, like myname="GNU, we search the # rest of the pairs for a quotating mark. elif value[0] == '"': rest = value[1:] + " " + " ".join(parts[2 + i:]) j = rest.find('"') if j == -1: log.warning("Missing endquotation in %s feature", extra={"task": self.defname}) value = rest else: value = rest[:j] elif value.isdigit(): value = int(value) if key in self.supported_features: print("accepted %s" % key, file=self.engine) else: print("rejected %s" % key, file=self.engine) if key == "done": if value == 1: done1 = True continue elif value == 0: log.info("Adds %d seconds timeout" % TIME_OUT_SECOND, extra={"task": self.defname}) # This'll buy you some more time self.timeout = time.time() + TIME_OUT_SECOND self.returnQueue.put("not ready") return if key == "smp" and value == 1: self.options["cores"] = { "name": "cores", "type": "spin", "default": 1, "min": 1, "max": 64 } elif key == "memory" and value == 1: self.options["memory"] = { "name": "memory", "type": "spin", "default": 32, "min": 1, "max": 4096 } elif key == "option" and key != "done": option = self.__parse_option(value) self.options[option["name"]] = option else: self.features[key] = value if key == "myname" and not self.name: self.setName(value) if done1: # Start a new game before using the engine: # (CECPv2 engines) print("new", file=self.engine) # We are now ready for play: self.emit("readyForOptions") self.emit("readyForMoves") self.returnQueue.put("ready") # A hack to get better names in protover 1. # Unfortunately it wont work for now, as we don't read any lines from # protover 1 engines. When should we stop? if self.protover == 1: if self.defname[0] in ''.join(parts): basis = self.defname[0] name = ' '.join( itertools.dropwhile(lambda part: basis not in part, parts)) self.features['myname'] = name if not self.name: self.setName(name)
def extract_plot(self, response): plot_css = 'article p ::text' plot = clean(response.css(plot_css).extract()) plot = dropwhile(lambda rd: 'release:' not in rd.lower(), plot) plot = [p for p in plot if 'release:' not in p.lower()] return plot or clean(response.css(plot_css).extract())
def _rerun_as_results(dset, revrange, since, branch, onto, message): """Represent the rerun as result records. In the standard case, the information in these results will be used to actually re-execute the commands. """ try: results = _revrange_as_results(dset, revrange) except ValueError as exc: yield get_status_dict("run", status="error", message=exc_str(exc)) return ds_repo = dset.repo # Drop any leading commits that don't have a run command. These would be # skipped anyways. results = list(dropwhile(lambda r: "run_info" not in r, results)) if not results: yield get_status_dict("run", status="impossible", ds=dset, message=("No run commits found in range %s", revrange)) return if onto is not None and onto.strip() == "": onto = results[0]["commit"] + "^" if onto and not ds_repo.commit_exists(onto): yield get_status_dict( "run", ds=dset, status="error", message=("Revision specified for --onto (%s) does not exist.", onto)) return start_point = onto or "HEAD" if branch or onto: yield get_status_dict( "run", ds=dset, # Resolve this to the full hexsha so downstream code gets a # predictable form. commit=ds_repo.get_hexsha(start_point), branch=branch, rerun_action="checkout", status="ok") def skip_or_pick(hexsha, result, msg): result["rerun_action"] = "skip-or-pick" shortrev = ds_repo.get_hexsha(hexsha, short=True) result["message"] = ("%s %s; %s", shortrev, msg, "skipping or cherry picking") for res in results: hexsha = res["commit"] if "run_info" in res: rerun_dsid = res["run_info"].get("dsid") if rerun_dsid is not None and rerun_dsid != dset.id: skip_or_pick(hexsha, res, "was ran from a different dataset") res["status"] = "impossible" else: res["rerun_action"] = "run" res["diff"] = diff_revision(dset, hexsha) # This is the overriding message, if any, passed to this rerun. res["rerun_message"] = message else: if len(res["parents"]) > 1: res["rerun_action"] = "merge" else: skip_or_pick(hexsha, res, "does not have a command") yield res
def remove_color(nick): # 0 is x3 # then follows 1 or 2 numbers return "".join(list(dropwhile(lambda x: x.isdigit(), nick[1:])))
def _parse(self, ds, name=None): """ Parse the description, examples, and tags from a docstring. """ lines = ds.split(os.linesep) def seek_past_head(ls): h = [] for i, l in enumerate(ls): if self._is_blank(l) or self._is_tag_start(l): return h, i h.append(l) else: return h, len(ls) head, non_head_index = seek_past_head(lines) #if not head: # raise LucidocError("Empty docstring") head = " ".join(l.strip() for l in head) ls1, ls2 = tee(lines[non_head_index:]) detail_lines = list( filterfalse(self._is_blank, takewhile(lambda l: not self._past_desc(l), ls1))) desc = head if detail_lines: desc += (("\n\n" if desc else "") + "\n".join(detail_lines)) post_desc = list(dropwhile(lambda l: not self._past_desc(l), ls2)) raw_tag_blocks = [] if post_desc and self._is_tag_start(post_desc[0]): curr_block = [] for i, l in enumerate(post_desc): if self._is_blank(l): first_non_tag_index = i + 1 break l = l.strip() if self._is_tag_start(l): if curr_block: raw_tag_blocks.append(curr_block) curr_block = [l] else: curr_block.append(l) else: first_non_tag_index = None curr_block and raw_tag_blocks.append(curr_block) else: first_non_tag_index = 0 examples = self._parse_example_lines( [] if first_non_tag_index is None else post_desc[first_non_tag_index:]) tags = [self._get_tag(chunk) for chunk in raw_tag_blocks] par, ret, err = [], [], [] for t in tags: if isinstance(t, ParTag): par.append(t) elif isinstance(t, RetTag): ret.append(t) elif isinstance(t, ErrTag): err.append(t) else: raise TypeError("Unrecognized doc tag type: {}".format( type(t))) if len(ret) > 1: raise LucidocError("Multiple ({}) returns tags: {}".format( len(ret), ret)) ret = ret[0] if ret else None self._last_seen = ParsedDocstringResult(ds, desc, par, ret, err, examples) return getattr(self._last_seen, name) if name else self._last_seen