Python pipe Exemples, cytoolz.curried.pipe Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : multi_stage.py Projet : 908kre/aplf

    def __init__(
        self,
        id,
        dataset_dir,
        output_dir,
        n_splits,
        base_train_config,
        folds,
    ):
        params = locals()
        torch.manual_seed(0)

        ids = pipe(range(n_splits), filter(lambda x: x in folds), list)

        train_df_path = delayed(load_train_df)(
            dataset_dir=join(dataset_dir, 'train'),
            output=join(output_dir, 'train.pqt'))

        train_df = delayed(pd.read_parquet)(train_df_path)

        kfolded = delayed(kfold)(train_df, n_splits)

        train_sets = pipe(ids, map(lambda x: delayed(lambda i: i[x])(kfolded)),
                          list)

        model_paths = pipe(
            zip(ids, train_sets),
            map(lambda x: delayed(train_fusion)(
                **base_train_config,
                model_path=join(output_dir, f"{id}-fold-{x[0]}-base-model.pt"),
                sets=x[1],
                log_dir=f'{config["TENSORBORAD_LOG_DIR"]}/{id}/{x[0]}/base',
            )), list)

        test_df_path = load_test_df(dataset_dir='/store/tellus/test',
                                    output=join(output_dir, 'test.pqt'))
        test_df = delayed(pd.read_parquet)(test_df_path)
        test_dataset = delayed(TellusDataset)(
            test_df,
            has_y=False,
        )

        submission_df_path = delayed(predict)(
            model_paths=model_paths,
            log_dir=f'{config["TENSORBORAD_LOG_DIR"]}/{id}/sub',
            dataset=test_dataset,
            log_interval=10,
            out_path=f'{output_dir}/{id}_submission.tsv',
        )

        self.output = delayed(lambda x: x)((
            model_paths,
            submission_df_path,
        ))

Exemple #2

0

Afficher le fichier

Fichier : base_query.py Projet : 908kre/ogura-lab

    def delete_cascade(self, id):
        import mlboard_api.query as qs
        pipe(
            self.get_children(id),
            map(lambda x: (
                getattr(qs, x.__class__.__name__)(
                    session=self.session
                ).delete_cascade(x.id)
            )),
            list
        )

        self.filter(self.entitiy_class.id == id).delete()
        return id

Exemple #3

0

Afficher le fichier

Fichier : predict.py Projet : 908kre/aplf

def predict(
    model_dirs,
    dataset,
    out_path,
    batch_size=512,
):

    device = torch.device("cuda")
    models = pipe(model_dirs, map(lambda x: os.path.join(x, '*.pt')),
                  map(glob.glob), concat, map(torch.load),
                  map(lambda x: x.eval().to(device)), list)
    loader = DataLoader(
        dataset,
        batch_size=batch_size,
        shuffle=False,
        pin_memory=True,
    )

    rows = []

    y_preds = []
    y_ids = []
    with torch.no_grad():
        for sample in loader:
            ids = sample['id']
            palser_x = sample['palsar'].to(device)

            normal_outputs = pipe(
                models,
                map(lambda x: x(palser_x)[0]),
                list,
            )
            output = pipe(
                [*normal_outputs],
                map(lambda x: x.softmax(dim=1)),
                reduce(lambda x, y: (x + y) / 2),
                lambda x: x.argmax(dim=1),
            )
            y_ids += ids
            y_preds += output.cpu().detach().tolist()

        rows = pipe(zip(y_ids, y_preds),
                    map(lambda x: {
                        'id': x[0],
                        'lable': x[1]
                    }), list)
        df = pd.DataFrame(rows)
        df.to_csv(out_path, sep='\t', header=False, index=False)
        return out_path

Exemple #4

0

Afficher le fichier

    def __init__(self,
                 feature_size=8,
                 depth=3,
                 ):
        super().__init__()
        self.down_layers = nn.ModuleList([
            DownSample(1, feature_size * 2 ** depth),
            *pipe(
                range(depth),
                reversed,
                map(lambda x: DownSample(
                    feature_size * (2 ** (x + 1)),
                    feature_size * (2 ** x),
                )),
                list,
            )
        ])

        self.center = DownSample(
            in_ch=feature_size,
            out_ch=feature_size,
        )

        self.up_layers = nn.ModuleList([
            *pipe(
                self.down_layers,
                reversed,
                map(lambda x: x.out_ch),
                take(depth),
                map(lambda x: UpSample(
                    feature_size,
                    feature_size,
                    x,
                )),
                list,
            ),
            UpSample(
                feature_size,
                feature_size,
                feature_size * 2 ** depth,
            ),
        ])

        self._output = nn.Conv2d(
            feature_size,
            2,
            kernel_size=3
        )

Exemple #5

0

Afficher le fichier

 def forward(self, x, others, size):
     out = pipe([x, *others],
                map(lambda x: F.interpolate(x, mode='bilinear', size=size)),
                list)
     out = torch.cat([*out], 1)
     out = self.block(out)
     return out

Exemple #6

0

Afficher le fichier

Fichier : multi.py Projet : 908kre/aplf

def validate(predicts, dataset, batch_size):
    loader = DataLoader(
        dataset,
        batch_size=batch_size,
        pin_memory=True,
        shuffle=False,
    )
    y_preds = np.array(predicts).mean(axis=0).argmax(axis=1)
    y_trues = pipe(
        loader,
        map(lambda x: x['label'].cpu().detach().tolist()),
        reduce(lambda x, y: x + y),
        np.array,
    )

    score = iou(
        y_preds,
        y_trues,
    )
    tn, fp, fn, tp = confusion_matrix(y_trues, y_preds).ravel()
    return {
        'TPR': tp / (tp + fn),
        'FNR': fn / (tp + fn),
        'FPR': fp / (fp + tn),
        'acc': (tp + tn) / (tp + tn + fp + fn),
        'pre': tp / (tp + fp),
        'iou': tp / (fn + tp + fp),
    }

Exemple #7

0

Afficher le fichier

    def __call__(self, epoch):
        cyclic = 1.0
        phase = epoch % self.period
        turn_phase, ratio = self.turning_point
        turn_cyclic = self.min_factor + self.range * ratio


        if  phase <= turn_phase:
            cyclic = (
                self.min_factor +
                (turn_cyclic - self.min_factor) *
                phase/turn_phase
            )

        else:
            cyclic = turn_cyclic + \
                (self.max_factor - turn_cyclic) * \
                (phase - turn_phase)/(self.period - turn_phase)

        gamma = pipe(
            self.milestones,
            filter(lambda x: x[0] <= epoch),
            map(lambda x: x[1]),
            last
        )
        return cyclic * gamma

Exemple #8

0

Afficher le fichier

def get_train_row(base_path, label_dir, label):
    rows = pipe(
        [
            ("PALSAR", "before"),
            ("PALSAR",  "after"),
            ("LANDSAT", "before"),
            ("LANDSAT",  "after"),
        ],
        map(lambda x: (base_path, *x, label_dir, "*.tif")),
        map(lambda x: os.path.join(*x)),
        map(glob.glob),
        list,
        lambda x: zip(*x),
        map(lambda x: list(map(Path)(x))),
        map(lambda x: {
            "id": x[0].name,
            "label": label,
            "palsar_before": str(x[0]),
            "palsar_after": str(x[1]),
            "landsat_before": str(x[2]),
            "landsat_after": str(x[3]),
        }),
        list
    )
    return rows

Exemple #9

0

Afficher le fichier

Fichier : preprocess.py Projet : 908kre/aplf

def take_topk(scores, paths, top_num):
    return pipe(
        zip(scores, paths),
        lambda x: topk(top_num, x, key=lambda y: y[0]),
        map(lambda x: x[1]),
        list
    )

Exemple #10

0

Afficher le fichier

        def to_dict(self,
                    convert_values: bool = False) -> MutableMapping[str, Any]:
            to_fields = curried.pipe(
                fields(self.__class__),
                curried.map(lambda a:
                            (a, curried.get_in([to_key], a.metadata))),
                curried.filter(lambda f: f[1]),
                list,
            )

            if convert_values:
                d = asdict(self)
            else:
                d = {
                    a.name: getattr(self, a.name)
                    for a in fields(self.__class__)
                }

            if not to_fields:
                return d

            return curried.reduce(
                lambda acc, f: curried.update_in(acc, f[1], lambda _: d[f[0].
                                                                        name]),
                to_fields,
                {},
            )

Exemple #11

0

Afficher le fichier

Fichier : consumer_functions.py Projet : johngeer/social-media-comparison

 def get_hashtag_string(given_item):
     """Return a string of hashtags associated with the given item"""
     return tz.pipe(
         tz.get_in(['entities', 'hashtags'], given_item, default=[]),
         tz.map(lambda x: tz.get_in(['text'], x, default=None)),
         tz.filter(lambda x: x is not None),
         lambda x: ", ".join(x))

Exemple #12

0

Afficher le fichier

Fichier : crawl.py Projet : vladserkoff/common-crawler

    def find_domain_urls(self, domain: str) -> List[str]:
        """
        Get all known urls for domain.

        Returns
        -------
        all_urls : iterator
        """
        def _urlkey_to_url(urlkey):
            try:
                # very rare bugged urlkeys appear
                domain, path = urlkey.split(')/', 1)
            except ValueError:
                return
            domain = domain.split(',')
            domain.reverse()
            domain = '.'.join(domain)
            if path:
                return '/'.join([domain, path])
            return domain

        urls_by_index = map(
            lambda ind: self.__get_domain_urls_in_index(ind, domain),
            self.indexes)
        all_urls = pipe(urls_by_index, concat, map(bytes.decode),
                        map(_urlkey_to_url), filter(None), map(unquote),
                        map(lambda x: x.strip()), unique, list)
        return all_urls

Exemple #13

0

Afficher le fichier

def sum_path():
    n = 1
    pos = (0, 0)
    side_length = 1

    sum_dict = {(0, 0): 1}

    step_fns = it.cycle([
        lambda x: (x[0] + 1, x[1]), lambda x: (x[0], x[1] + 1), lambda x:
        (x[0] - 1, x[1]), lambda x: (x[0], x[1] - 1)
    ])

    step_fn = next(step_fns)
    rotation_break_seq = set()

    while True:
        if is_odd_square(n - 1):
            step_fn = next(step_fns)
            side_length += 2
            delta_seq = [
                side_length - 2, side_length - 2 + 1, side_length - 2 + 1
            ]

            rotation_break_seq = cc.pipe(it.accumulate([n] + delta_seq),
                                         cc.drop(1), set)
        elif n in rotation_break_seq:
            step_fn = next(step_fns)

        sum_dict[pos] = neighbors_sum(pos, sum_dict)

        yield (pos, sum_dict[pos])

        pos = step_fn(pos)
        n += 1

Exemple #14

0

Afficher le fichier

Fichier : attrs_serde.py Projet : tomsender/attrs-serde

    def serde_with_class(cls):
        from_fields = list(
            map(lambda a: (a, get_in([from_key], a.metadata, [a.name])),
                fields(cls)))

        to_fields = pipe(
            fields(cls),
            map(lambda a: (a, get_in([to_key], a.metadata))),
            filter(lambda f: f[1]),
            list,
        )

        def from_dict(d):
            return cls(**dict(
                map(
                    lambda f: (f[0].name, get_in(f[1], d, f[0].default)),
                    from_fields,
                )))

        def to_dict(self):
            d = asdict(self)
            return reduce(
                lambda acc, f: update_in(acc, f[1], lambda _: d[f[0].name]),
                to_fields,
                {},
            )

        cls.from_dict = staticmethod(from_dict)
        cls.to_dict = to_dict
        return cls

Exemple #15

0

Afficher le fichier

Fichier : report.py Projet : hall-lab/cromulent

def _get_wf_call_failures(metadata, opts):
    calls = []
    if 'calls' in opts:
        calls = opts['calls'].split(',')
    else:
        calls = metadata['calls'].keys()

    jobids = None
    if 'jobids' in opts:
        jobids = set(opts['jobids'].split(','))

    fails = {}

    for c in calls:
        tasks = metadata['calls'][c]
        failures = pipe(
            tasks,
            filter(lambda x: get('executionStatus', x) == 'Failed'),
            filter(lambda x: _valid_job_id(jobids, get('jobId', x))),
            map(
                lambda x: {
                    'jobId': get('jobId', x),
                    #                                               'inputs'  : get('inputs', x),
                    'stderr': get('stderr', x),
                    'shard': get('shardIndex', x),
                    'err_msg': get_in(['failures', 0, 'message'], x, 'NA'),
                    #                                               'jes'     : get('jes', x),
                    #                                               'runtime' : get('runtimeAttributes', x),
                    'rc': get('returnCode', x, 'NA'),
                }),
            list)
        fails[c] = failures

    return fails

Exemple #16

0

Afficher le fichier

Fichier : preprocess.py Projet : 908kre/aplf

def is_not_reg(rle_mask):
    if(isinstance(rle_mask, str)):
        return pipe(rle_mask.split(' '),
                    len,
                    lambda x: x > 6)
    else:
        return True

Exemple #17

0

Afficher le fichier

def get_summary(annotations: Annotations, labels: Labels) -> t.Any:
    count = len(annotations)
    label_count = pipe(annotations, map(lambda x: len(x["label_ids"])), list,
                       np.array)
    label_hist = {
        5: np.sum(label_count == 5),
        4: np.sum(label_count == 4),
        3: np.sum(label_count == 3),
    }

    label_ids = pipe(
        annotations,
        mapcat(lambda x: x["label_ids"]),
        list,
        np.array,
    )
    total_label_count = len(label_ids)
    top = pipe(
        frequencies(label_ids).items(),
        topk(5, key=lambda x: x[1]),
        map(lambda x: (
            f"{labels[x[0]].category}::{labels[x[0]].detail}",
            x[1],
        )),
        list,
    )

    worst = pipe(
        frequencies(label_ids).items(),
        topk(5, key=lambda x: -x[1]),
        map(lambda x: (
            f"{labels[x[0]].category}::{labels[x[0]].detail}",
            x[1],
        )),
        list,
    )
    return {
        "count": count,
        "label_hist": label_hist,
        "label_count_mean": label_count.mean(),
        "label_count_median": np.median(label_count),
        "label_count_max": label_count.max(),
        "label_count_min": label_count.min(),
        "total_label_count": total_label_count,
        "top": top,
        "worst": worst,
    }

Exemple #18

0

Afficher le fichier

Fichier : consumer_functions.py Projet : johngeer/social-media-comparison

 def get_categories(given_dict):
     """Return a string of the categories associated with a post"""
     return tz.pipe(
         tz.get_in(['object', 'tags'], given_dict, default = []),
         tz.filter(lambda x: tz.get_in(['objectType'], x, default=None) == 'category'),
         tz.map(lambda x: tz.get_in(['displayName'], x, default=None)),
         lambda x: ", ".join(x)
     )

Exemple #19

0

Afficher le fichier

Fichier : preprocess.py Projet : 908kre/aplf

def add_consistency_noise(batch_images, ):
    filped = batch_images.flip([3])
    return pipe(
        batch_images,
        map(ramdom_erase),
        list,
        torch.stack
    )

Exemple #20

0

Afficher le fichier

def batch_aug(aug, batch, ch=3):
    return pipe(
        batch,
        map(lambda x: [aug(x[0:ch, :, :]), aug(x[ch:2*ch, :, :])]),
        map(lambda x: torch.cat(x, dim=0)),
        list,
        torch.stack
    )

Exemple #21

0

Afficher le fichier

Fichier : preprocess.py Projet : 908kre/aplf

def get_segment_indices(dataset, filter_indcies):
    df = dataset.df
    filtered = df.iloc[filter_indcies]
    return pipe(
        filtered[filtered['is_empty'] == False].index,
        map(df.index.get_loc),
        list
    )

Exemple #22

0

Afficher le fichier

Fichier : base_query.py Projet : 908kre/ogura-lab

 def bulk_insert(self, objects):
     if(len(objects) > 0):
         sql = self.entitiy_class.__table__.insert()\
             .values(pipe(objects,
                          map(lambda x: x if isinstance(
                              x, dict) else x.to_dict()),
                          list))
         self.session.execute(sql)
         self.session.commit()

Exemple #23

0

Afficher le fichier

 def forward(self, palser_x, landsat_x):
     palser_x = self.pad(palser_x)
     x = pipe([landsat_x, palser_x],
              map(lambda x: F.interpolate(
                  x, mode='bilinear', size=(self.resize, self.resize))),
              list, lambda x: torch.cat(x, dim=1))
     x = self.fusion_enc(x)
     x = self.logit_out(x).view(-1, 2)
     return x

Exemple #24

0

Afficher le fichier

Fichier : consumer_functions.py Projet : johngeer/social-media-comparison

def save_first(stream_key, stream_iterator):
    """Save the first entry in the stream as an example"""
    def parse_entry(given_entry):
        """parse either the WordPress stream strings, 
        or the semi-parsed twitter stream"""
        if isinstance(given_entry, str):
            return json.loads(given_entry)
        else:
            return dict(given_entry)
    file_name = '../data/samples/example_{}.json'.format(stream_key)
    with open(file_name, 'w') as outfile:
        tz.pipe( 
            next(stream_iterator), # first entry
            parse_entry,           # parse
            json.dumps,            # unparse
            outfile.write)         # save
    print("Saved {}".format(file_name))
    return True

Exemple #25

0

Afficher le fichier

 def __init__(self,
              in_ch,
              feature_size=64,
              depth=3,
              ratio=2):
     super().__init__()
     self.down_layers = nn.ModuleList(
         [
             DownSample(
                 in_ch=in_ch,
                 out_ch=feature_size,
             ),
             *pipe(
                 range(depth),
                 map(lambda d: DownSample(
                     in_ch=int(feature_size*ratio**(d)),
                     out_ch=int(feature_size*ratio**(d + 1)),
                 )),
                 list,
             )
         ]
     )
     self.center = DownSample(
         in_ch=feature_size*ratio**depth,
         out_ch=feature_size*ratio**depth,
     )
     self.up_layers = nn.ModuleList([
         *pipe(
             range(depth),
             reversed,
             map(lambda l: UpSample(
                 in_ch=feature_size *
                 ratio**(l+1) + feature_size*ratio**(l+1),
                 out_ch=feature_size*ratio**l,
             )),
             list,
         ),
         UpSample(
             in_ch=feature_size + feature_size,
             out_ch=feature_size,
         ),
     ])
     self.out_ch = feature_size

Exemple #26

0

Afficher le fichier

def test_flip():
    writer = SummaryWriter(f'{config["TENSORBORAD_LOG_DIR"]}/test')
    dataset_df = load_dataset_df('/store/kaggle/tgs')
    dataset = TgsSaltDataset(dataset_df)
    writer.add_image(
        f"flip",
        vutils.make_grid(
            pipe(range(8), map(lambda x: dataset[12]),
                 map(lambda x: [x['image'], x['mask']]), concat, list)),
    )

Exemple #27

0

Afficher le fichier

 def _post(target, methods, entities=[]):
     with DBSession() as sess:
         query_class = eval(f"qry.{target}")
         q = query_class(
             entities=pipe(entities, map(lambda x: eval(f'ms.{x}')), list),
             session=sess,
         )
         for m in methods:
             q = getattr(q, m['name'])(*m['args'], **m['kwargs'])
         return q

Exemple #28

0

Afficher le fichier

 def find_unbalanced(self):
     node = self
     while True:
         grouped_children = node.grouped('children')
         unbalanced = cc.pipe(grouped_children,
                              cc.valfilter(lambda x: len(x) == 1),
                              lambda x: cc.first(x.values())[0])
         if unbalanced.children_are_balanced:
             return unbalanced
         node = unbalanced

Exemple #29

0

Afficher le fichier

 def grouped(self, group, key=lambda x: x.weight):
     if group == 'siblings' and not self.parent:
         return {self.weight: [self]}
     elif group in {'siblings', 'children'}:
         agg = self.siblings if group == 'siblings' else self.children
         return cc.pipe(((key(x), x) for x in agg),
                        cc.groupby(lambda x: x[0]),
                        cc.valmap(lambda x: [y[1] for y in x]))
     else:
         return {}

Exemple #30

0

Afficher le fichier

Fichier : preprocess.py Projet : 908kre/aplf

def add_noise(batch_images, erase_num, erase_p):
    ramdom_erase = RandomErasing(
        num=erase_num
    )
    return pipe(
        batch_images,
        map(ramdom_erase),
        list,
        torch.stack
    )

Exemple #31

0

Afficher le fichier

Fichier : fine_tune.py Projet : 908kre/aplf

def validate(x, y, epoch):
    score = pipe(
        zip(
            x.argmax(dim=1).cpu().detach().numpy(),
            y.cpu().detach().numpy()
        ),
        map(lambda x: iou(*x)),
        list,
        np.mean
    )
    return score

Exemple #32

0

Afficher le fichier

def test_kfold():
    output = load_train_df(
        dataset_dir='/store/tellus/train',
        output='/store/tmp/train.pqt'
    )
    df = pd.read_parquet(output)
    sets = kfold(df, n_splits=10)
    for s in sets:
        assert pipe(
            s['train_pos'],
            take(100),
            map(lambda x: x['label']),
            filter(lambda x: x == 0),
            list,
            len
        ) == 0
        assert pipe(
            s['val_pos'],
            take(100),
            map(lambda x: x['label']),
            filter(lambda x: x == 0),
            list,
            len
        ) == 0
        assert pipe(
            s['train_neg'],
            take(100),
            map(lambda x: x['label']),
            filter(lambda x: x == 1),
            list,
            len
        ) == 0
        assert pipe(
            s['val_neg'],
            take(100),
            map(lambda x: x['label']),
            filter(lambda x: x == 1),
            list,
            len
        ) == 0
        assert len(s) == 4

Exemple #33

0

Afficher le fichier

 def __init__(self,  epoch_size, len_indices, shuffle=True, start_at=0):
     self.shuffle = shuffle
     self.epoch_size = epoch_size
     self.len_indices = len_indices
     indices = range(len_indices)
     self.chunks = pipe(
         range(0, len_indices//epoch_size),
         map(lambda x: indices[x*epoch_size:(x+1)*epoch_size]),
         map(list),
         list,
     )
     self.chunk_idx = start_at

Exemple #34

0

Afficher le fichier

Fichier : bamtoolz.py Projet : sbg/Mitty

def worker(pipeline, bam_fname, result_q, contig_q,
           paired=False, singles_q=None, max_singles=1000,
           is_singles_mixer=False, single_src_cnt=None):
  """Given a pipeline, run it with reads from the given bam taken from contigs supplied
  over the contig_q.

  This expects the pipeline to yield one final result which it can then return.

  It expects the last element of pipeline to be a function that consumes a read iterator and returns a result.

  This is more flexible than you think, since the result can be an iterator, so this can be
  used to filter reads in parallel. See examples in the filter analysis tutorial


  :param pipeline:  A list of pipelines
  :param bam_fname: Source BAM file
  :param result_q:  The result is put here.
  :param contig_q:  messages are of the form (ref, True/False)
                    ref is the name of the contig
                    True/False indicates if eof should be set T/F
                    This controls whether we read to end of file including all the
                    unmapped reads. The caller figures out if this is that last
                    contig that sits just before that tail of unmapped reads at the end
                    of the BAM file
  :param paired:    Do we pair the reads before passing them to the pipeline?
  :param singles_q: messages are SAM strings of reads converted using tostring().
                    This is only used/relevant if paired=True because we use that to
                    collect the singles from all contigs and pair them up
                    Depending on whether this is the last

  :param max_singles:       When we have these many singles, start passing then to the
                            singles mixer

  :param is_singles_mixer:  Set True if this is also the "singles mixer" that
                            receives unpaired reads from other workers
  :param single_src_cnt:    How many sources of singles we have
                            This is
  :return:
  """
  if paired and singles_q is None:
    raise RuntimeError('Need singles_q to be defined if using paired reads')

  fp = pysam.AlignmentFile(bam_fname)
  if paired:
    t1 = paired_read_iter(fp, contig_q,
                          singles_q=singles_q, max_singles=max_singles,
                          is_singles_mixer=is_singles_mixer, single_src_cnt=single_src_cnt)
  else:
    t1 = unpaired_read_iter(fp, contig_q)

  sink = pipeline[-1]
  result_q.put(sink(cyt.pipe(t1, *pipeline[:-1])))

Exemple #35

0

Afficher le fichier

Fichier : consumer_functions.py Projet : johngeer/social-media-comparison

 def reformat_timestamp(given_ts):
     """Reformat into WordPress.com format"""
     # Twitter example: "Sat Oct 10 14:48:34 +0000 2015"
     # WordPress example: "2015-10-10T19:42:34Z"
     if given_ts is None:
         return ""
     try: 
         return tz.pipe(
             given_ts,
             lambda x: dt.datetime.strptime(x, "%a %b %d %H:%M:%S +0000 %Y"),
             lambda x: x.strftime("%Y-%m-%dT%H:%M:%SZ"))
     except: # If it can't reformat it, just use the previous version
         return str(given_ts)

Exemple #36

0

Afficher le fichier

Fichier : consumer_functions.py Projet : johngeer/social-media-comparison

def connect_to_twitter_stream(stream_key, saveing_function):
    """Connect to & consume a Twitter stream"""
    stream = tz.pipe(
        ## Connect
        start_stream_twitter(), # public sampled stream
        tz.map(print_twitter_stall_warning),
        ## Filter
        tz.filter(is_tweet), # filter to tweets
        # tz.filter(is_user_lang_tweet(["en", "en-AU", "en-au", "en-GB", "en-gb"])), # filter to English
        ## Parse
        tz.map(parse_tweet), # parse into a flat dictionary
    )

    # Collect
    saveing_function(stream_key, stream)

Exemple #37

0

Afficher le fichier

Fichier : consumer_functions.py Projet : johngeer/social-media-comparison

def connect_to_twitter_filtered_stream(stream_key, saveing_function):
    """Connect to & consume a filtered Twitter stream, where Twitter does 
    some of the filtering"""
    stream = tz.pipe(
        ## Connect
        start_stream_twitter(**CONFIG['twitter_filter']),
        tz.map(print_twitter_stall_warning),
        ## Filter
        tz.filter(is_tweet), # filter to tweets
        ## Parse
        tz.map(parse_tweet), # parse into a flat dictionary
    )

    ## Collect
    saveing_function(stream_key, stream)

Exemple #38

0

Afficher le fichier

Fichier : consumer_functions.py Projet : johngeer/social-media-comparison

def connect_to_wordpress_stream(stream_key, saveing_function):
    """Connect to & consume a WordPress event stream"""
    parse_functions = {
        'posts': parse_post,
        'likes': parse_like,
        'comments': parse_comment}
    stream = tz.pipe(
        ## Connect
        start_wordpress_stream(CONFIG['stream_urls'][stream_key]),
        ## Parse
        tz.map(permissive_json_load), # parse the JSON, or return an empty dictionary
        tz.map(parse_functions[stream_key]), # parse into a flat dictionary
    )

    # Collect
    saveing_function(stream_key, stream)

Exemple #39

0

Afficher le fichier

Fichier : aaftoolz.py Projet : sbg/Mitty

def worker(pipeline, aaf_fname, result_q, contig_q):
  """Given a pipeline, run it with reads from the given AAF taken from contigs supplied
  over the contig_q.

  This expects the pipeline to yield one final result which it can then return.

  It expects the last element of pipeline to be a function that consumes a aaf
  iterator and returns a result.

  :param pipeline:  A list of pipeline nodes
  :param aaf_fname: Source AAF file
  :param result_q:  The result is put here.
  :param contig_q:  messages are contig names. A None indicates stop_iter
  :return:
  """

  aaf = pysam.TabixFile(aaf_fname)
  t1 = aaf_iter(aaf, contig_q)
  sink = pipeline[-1]
  result_q.put(sink(cyt.pipe(t1, *pipeline[:-1])))