Esempio n. 1
0
def get_odes_extracts(db, api_key):
    '''
    '''
    odeses, extracts = list(), list()
    
    vars = dict(api_key=api_key)
    extracts_url = uritemplate.expand(odes_extracts_url, vars)
    resp = requests.get(extracts_url)

    if resp.status_code in range(200, 299):
        odeses.extend([data.ODES(str(oj['id']), status=oj['status'], bbox=oj['bbox'],
                                 links=oj.get('download_links', {}),
                                 processed_at=(parse_datetime(oj['processed_at']) if oj['processed_at'] else None),
                                 created_at=(parse_datetime(oj['created_at']) if oj['created_at'] else None))
                       for oj in resp.json()])
    
    for odes in sorted(odeses, key=attrgetter('created_at'), reverse=True):
        extract = data.get_extract(db, odes=odes)
        
        if extract is None:
            extract = data.Extract(None, None, None, odes, None, None, None)
        
        extracts.append(extract)

    return extracts
Esempio n. 2
0
    def from_json(json_str):
        verbatim_tweet = VerbatimTweet()
        verbatim_tweet.json = json_str
        verbatim_tweet.save()

        obj = json.loads(json_str)
        tweet = Tweet()
        tweet.verbatim_tweet = verbatim_tweet
        tweet.id = obj['id']
        tweet.created_at = parse_datetime(obj['created_at'])
        for field in ['lang', 'retweeted', 'retweet_count',
                      'text', 'truncated']:
            setattr(tweet, field, obj.get(field))

        user_obj = obj['user']
        user_created_at = parse_datetime(user_obj['created_at'])
        (user, created) = TwitterUser.objects.get_or_create(id=user_obj['id'],
                                                            defaults={'created_at': user_created_at})
        if created:
            user.name = user_obj.get('name')
            user.screen_name = user_obj.get('screen_name')
            user.verified = user_obj.get('verified')
            user.save()
        tweet.user = user

        tweet.save()
        return tweet
Esempio n. 3
0
    def parse_practices(self, response):
        # body > div.container > div.page-body > table > tbody
        practices = response.xpath('/html/body/div[2]/div[3]/table/tbody/tr')
        total = len(practices)

        for i, row in enumerate(practices):
            cells = [x.strip() for x in row.css('td::text').extract() if x.strip() != '']

            r = PracticeSession()
            r['student'] = self.username
            r['quiz_index'] = total-i

            # attempt to see if the date in parentheses is more specific
            # than the month-day specifier (e.g. 'hours ago'), and use it if so.
            # otherwise, just use the month-day specifier
            try:
                inner_date = row.css('td:first-child small::text').extract()[0]
                inner_date_parsed = VeritasScraper.ddp.get_date_data(inner_date)
                r['taken_on'] = inner_date_parsed['date_obj'] \
                    if inner_date_parsed and "day" not in inner_date \
                    else parse_datetime(cells[0])
            except IndexError:
                r['taken_on'] = parse_datetime(cells[0])

            r['question_count'] = int(cells[1])
            r['percent_correct'] = cells[2]
            r['duration'] = cells[3]

            yield r
Esempio n. 4
0
    def test_children_metadata(self):
        path = u'kind/of/magíc.mp3'
        record = recursively_create_file(self.node_settings, path)
        version = factories.FileVersionFactory()
        record.versions.add(version)
        record.save()
        res = self.send_hook(
            'osfstorage_get_children',
            {'fid': record.parent._id},
            {},
        )
        assert_equal(len(res.json), 1)
        res_data = res.json[0]
        expected_data = record.serialize()

        # Datetimes in response might not be exactly the same as in record.serialize
        # because of the way Postgres serializes dates. For example,
        # '2017-06-05T17:32:20.964950+00:00' will be
        # serialized as '2017-06-05T17:32:20.96495+00:00' by postgres
        # Therefore, we parse the dates then compare them
        expected_date_modified = parse_datetime(expected_data.pop('modified'))
        expected_date_created = parse_datetime(expected_data.pop('created'))

        res_date_modified = parse_datetime(res_data.pop('modified'))
        res_date_created = parse_datetime(res_data.pop('created'))

        assert_equal(res_date_modified, expected_date_modified)
        assert_equal(res_date_created, expected_date_created)
        assert_equal(res_data, expected_data)
Esempio n. 5
0
def filter_logic(unlisted, timeSelect, startDate, endDate,
                 timeTypeSelect, cvssSelect, cvss, rejectedSelect, limit, skip):
    query = []
    # retrieving lists
    if rejectedSelect == "hide":
        exp = "^(?!\*\* REJECT \*\*\s+DO NOT USE THIS CANDIDATE NUMBER.*)"
        query.append({'summary': re.compile(exp)})
    # cvss logic
    if cvssSelect != "all":
        if cvssSelect == "above":
            query.append({'cvss': {'$gt': float(cvss)}})
        if cvssSelect == "equals":
            query.append({'cvss': float(cvss)})
        if cvssSelect == "below":
            query.append({'cvss': {'$lt': float(cvss)}})
    # date logic
    if timeSelect != "all":
        startDate = parse_datetime(startDate, ignoretz=True, dayfirst=True)
        endDate   = parse_datetime(endDate,   ignoretz=True, dayfirst=True)
        if timeSelect == "from":
            query.append({timeTypeSelect: {'$gt': startDate}})
        if timeSelect == "until":
            query.append({timeTypeSelect: {'$lt': endDate}})
        if timeSelect == "between":
            query.append({timeTypeSelect: {'$gt': startDate, '$lt': endDate}})
        if timeSelect == "outside":
            query.append({'$or': [{timeTypeSelect: {'$lt': startDate}}, {timeTypeSelect: {'$gt': endDate}}]})
    return dbLayer.getCVEs(limit=limit, skip=skip, query=query)
Esempio n. 6
0
def get_odes_extract(db, id, api_key):
    '''
    '''
    extract, odes = data.get_extract(db, extract_id=id), None
    
    if extract is None:
        # Nothing by that name in the database, so ask the ODES API.
        vars = dict(id=id, api_key=api_key)
        extract_url = uritemplate.expand(odes_extracts_url, vars)
        resp = requests.get(extract_url)

        if resp.status_code in range(200, 299):
            oj = resp.json()
            odes = data.ODES(str(oj['id']), status=oj['status'], bbox=oj['bbox'],
                             links=oj.get('download_links', {}),
                             processed_at=(parse_datetime(oj['processed_at']) if oj['processed_at'] else None),
                             created_at=(parse_datetime(oj['created_at']) if oj['created_at'] else None))
    
        if odes is None:
            # Nothing at all for this ID anywhere.
            return None
    
    if odes is None:
        # A DB extract was found, but nothing in ODES - very weird!
        return get_odes_extract(db, extract.odes.id, api_key)
    
    # We have a known ODES, so look for it in the database.
    extract = data.get_extract(db, odes=odes)
    
    if extract is None:
        # Known ODES, but nothing in the DB so make one up.
        return data.Extract(None, None, None, odes, None, None, None)
    
    return extract
Esempio n. 7
0
    def find_timing_env_commands(self):
        content = self.get_timing_env_log_content()
        if not content: return [], set(['Timing env log is not found!'])

        commands, ordered_commands, errors = {}, [], set()
        for line in content.split('\n'):
            try:
                data = json.loads(line)
            except:
                continue

            cmd_hash_key = (data['command'],data['unique_nr'])
            if cmd_hash_key not in commands.keys() and data['tag'] == 'BEGIN':
                commands[cmd_hash_key] = data
                commands[cmd_hash_key]['tags'] = [data['tag']]
                ordered_commands.append(commands[cmd_hash_key])

            elif cmd_hash_key in commands.keys() and data['tag'] in commands[cmd_hash_key]['tags']:
                errors.add('Found duplicated command: {}'.format(data['command']))

            elif cmd_hash_key in commands.keys() and data['tag'] == 'END':
                commands[cmd_hash_key].update(data)
                commands[cmd_hash_key]['tags'].append(data['tag'])

            else:
                errors.add('Unknown error: {}'.format(data['command']))

        for command in ordered_commands:
            command['started_at'] = parse_datetime(command['started_at'])
            if 'finished_at' in command: command['finished_at'] = parse_datetime(command['finished_at'])

        return ordered_commands, errors
Esempio n. 8
0
def validate_datetime(dt_str):
    try:
        parse_datetime(dt_str)
    except ValueError as exc:
        return str(exc)
    except TypeError:
        return _("Invalid input.")
Esempio n. 9
0
  def generate_minimal_query(self, f):
    query = []
    # retrieving lists
    if f['rejectedSelect'] == "hide":
      exp = "^(?!\*\* REJECT \*\*\s+DO NOT USE THIS CANDIDATE NUMBER.*)"
      query.append({'summary': re.compile(exp)})

    # cvss logic
    if   f['cvssSelect'] == "above":  query.append({'cvss': {'$gt': float(f['cvss'])}})
    elif f['cvssSelect'] == "equals": query.append({'cvss': float(f['cvss'])})
    elif f['cvssSelect'] == "below":  query.append({'cvss': {'$lt': float(f['cvss'])}})

    # date logic
    if f['timeSelect'] != "all":
      if f['startDate']:
        startDate = parse_datetime(f['startDate'], ignoretz=True, dayfirst=True)
      if f['endDate']:
        endDate   = parse_datetime(f['endDate'],   ignoretz=True, dayfirst=True)

      if   f['timeSelect'] == "from":
        query.append({f['timeTypeSelect']: {'$gt': startDate}})
      elif f['timeSelect'] == "until":
        query.append({f['timeTypeSelect']: {'$lt': endDate}})
      elif f['timeSelect'] == "between":
        query.append({f['timeTypeSelect']: {'$gt': startDate, '$lt': endDate}})
      elif f['timeSelect'] == "outside":
        query.append({'$or': [{f['timeTypeSelect']: {'$lt': startDate}}, {f['timeTypeSelect']: {'$gt': endDate}}]})
    return query
Esempio n. 10
0
def test_contributors_get_aware_datetime():
    """Get an aware datetime from a valid string."""
    iso_datetime = make_aware(parse_datetime("2016-01-24T23:15:22+0000"),
                              tz=pytz.utc)

    # Test ISO 8601 datetime.
    assert iso_datetime == get_aware_datetime("2016-01-24T23:15:22+0000",
                                              tz=pytz.utc)

    # Test git-like datetime.
    assert iso_datetime == get_aware_datetime("2016-01-24 23:15:22 +0000",
                                              tz=pytz.utc)

    # Test just an ISO 8601 date.
    iso_datetime = make_aware(parse_datetime("2016-01-24T00:00:00+0000"),
                              tz=pytz.utc)
    assert iso_datetime == get_aware_datetime("2016-01-24", tz=pytz.utc)

    # Test None.
    assert get_aware_datetime(None) is None

    # Test empty string.
    assert get_aware_datetime("") is None

    # Test non-empty string.
    with pytest.raises(ArgumentTypeError):
        get_aware_datetime("THIS FAILS")

    # Test blank string.
    with pytest.raises(ArgumentTypeError):
        get_aware_datetime(" ")
Esempio n. 11
0
def request_odes_extract(extract, request, url_for, api_key):
    '''
    '''
    env = Environment(loader=PackageLoader(__name__, 'templates'))
    args = dict(
        name = extract.name or extract.wof.name or 'an unnamed place',
        link = urljoin(util.get_base_url(request), url_for('ODES.get_extract', extract_id=extract.id)),
        extracts_link = urljoin(util.get_base_url(request), url_for('ODES.get_extracts')),
        created = extract.created
        )

    email = dict(
        email_subject=env.get_template('email-subject.txt').render(**args),
        email_body_text=env.get_template('email-body.txt').render(**args),
        email_body_html=env.get_template('email-body.html').render(**args)
        )

    params = {key: extract.envelope.bbox[i] for (i, key) in enumerate(('bbox_w', 'bbox_s', 'bbox_e', 'bbox_n'))}
    params.update(email)

    post_url = uritemplate.expand(odes_extracts_url, dict(api_key=api_key))
    resp = requests.post(post_url, data=params)
    oj = resp.json()
    
    if 'error' in oj:
        raise util.KnownUnknown("Error: {}".format(oj['error']))
    elif resp.status_code != 200:
        raise Exception("Bad ODES status code: {}".format(resp.status_code))
    
    return data.ODES(str(oj['id']), status=oj['status'], bbox=oj['bbox'],
                     links=oj.get('download_links', {}),
                     processed_at=(parse_datetime(oj['processed_at']) if oj['processed_at'] else None),
                     created_at=(parse_datetime(oj['created_at']) if oj['created_at'] else None))
Esempio n. 12
0
 def entry_to_event_tuple(entry):
     title = entry.title.text
     description = entry.content.text or '-'
     description = description.splitlines()[0]
     starts_at = parse_datetime(entry.when[0].start)
     ends_at = parse_datetime(entry.when[0].end)
     duration = (ends_at - starts_at).seconds / 3600.0
     return (title, description, duration)
Esempio n. 13
0
 def from_dict(cls, obj):
     return cls(
         obj['summary'],
         obj['guid'],
         parse_datetime(obj['start']['utcdate']).replace(tzinfo=pytz.timezone("America/New_York")).astimezone(pytz.utc),
         parse_datetime(obj['end']['utcdate']).replace(tzinfo=pytz.timezone("America/New_York")).astimezone(pytz.utc),
         obj['start']['allday'] == 'true'
     )
Esempio n. 14
0
def filter_logic(f, limit, skip):
    query = []
    # retrieving lists
    if f['blacklistSelect'] == "on":
        regexes = db.getRules('blacklist')
        if len(regexes) != 0:
            exp = "^(?!" + "|".join(regexes) + ")"
            query.append({'$or': [{'vulnerable_configuration': re.compile(exp)},
                                  {'vulnerable_configuration': {'$exists': False}},
                                  {'vulnerable_configuration': []}
                                  ]})
    if f['whitelistSelect'] == "hide":
        regexes = db.getRules('whitelist')
        if len(regexes) != 0:
            exp = "^(?!" + "|".join(regexes) + ")"
            query.append({'$or': [{'vulnerable_configuration': re.compile(exp)},
                                  {'vulnerable_configuration': {'$exists': False}},
                                  {'vulnerable_configuration': []}
                                  ]})
    if f['unlistedSelect'] == "hide":
        wlregexes = compile(db.getRules('whitelist'))
        blregexes = compile(db.getRules('blacklist'))
        query.append({'$or': [{'vulnerable_configuration': {'$in': wlregexes}},
                              {'vulnerable_configuration': {'$in': blregexes}}]})
    if f['rejectedSelect'] == "hide":
        exp = "^(?!\*\* REJECT \*\*\s+DO NOT USE THIS CANDIDATE NUMBER.*)"
        query.append({'summary': re.compile(exp)})

    # plugin filters
    query.extend(plugManager.doFilter(f, **pluginArgs()))

    # cvss logic
    if f['cvssSelect'] == "above":    query.append({'cvss': {'$gt': float(f['cvss'])}})
    elif f['cvssSelect'] == "equals": query.append({'cvss': float(f['cvss'])})
    elif f['cvssSelect'] == "below":  query.append({'cvss': {'$lt': float(f['cvss'])}})

    # date logic
    if f['timeSelect'] != "all":
        if f['startDate']:
            startDate = parse_datetime(f['startDate'], ignoretz=True, dayfirst=True)
        if f['endDate']:
            endDate   = parse_datetime(f['endDate'],   ignoretz=True, dayfirst=True)

        if f['timeSelect'] == "from":
            query.append({f['timeTypeSelect']: {'$gt': startDate}})
        if f['timeSelect'] == "until":
            query.append({f['timeTypeSelect']: {'$lt': endDate}})
        if f['timeSelect'] == "between":
            query.append({f['timeTypeSelect']: {'$gt': startDate, '$lt': endDate}})
        if f['timeSelect'] == "outside":
            query.append({'$or': [{f['timeTypeSelect']: {'$lt': startDate}}, {f['timeTypeSelect']: {'$gt': endDate}}]})
    cve=db.getCVEs(limit=limit, skip=skip, query=query)
    # marking relevant records
    if f['whitelistSelect'] == "on":   cve = whitelist_mark(cve)
    if f['blacklistSelect'] == "mark": cve = blacklist_mark(cve)
    plugManager.mark(cve, **pluginArgs())
    cve = list(cve)
    return cve
Esempio n. 15
0
 def _assert_is_datetime(self, timestamp):
     if not timestamp:
         return False
     try:
         parse_datetime(timestamp)
     except ValueError:
         return False
     else:
         return True
Esempio n. 16
0
 def from_line(cls, line, lineidx=None):
     m = CLOCK_RE.match(line)
     start = (m.group("start"))
     end = (m.group("end"))
     if start:
         start = parse_datetime(m.group("start"), fuzzy=True)
     if end:
         end = parse_datetime(m.group("end"), fuzzy=True)
     return cls(start=start, end=end, lineidx=lineidx)
Esempio n. 17
0
    def get_group_tag_keys_and_top_values(
            self, project_id, group_id, environment_ids, user=None, keys=None, value_limit=TOP_VALUES_DEFAULT_LIMIT):
        # Similar to __get_tag_key_and_top_values except we get the top values
        # for all the keys provided. value_limit in this case means the number
        # of top values for each key, so the total rows returned should be
        # num_keys * limit.
        start, end = self.get_time_range()

        # First get totals and unique counts by key.
        keys_with_counts = self.get_group_tag_keys(project_id, group_id, environment_ids, keys=keys)

        # Then get the top values with first_seen/last_seen/count for each
        filters = {
            'project_id': [project_id],
        }
        if environment_ids:
            filters['environment'] = environment_ids
        if keys is not None:
            filters['tags_key'] = keys
        if group_id is not None:
            filters['issue'] = [group_id]

        aggregations = [
            ['count()', '', 'count'],
            ['min', SEEN_COLUMN, 'first_seen'],
            ['max', SEEN_COLUMN, 'last_seen'],
        ]
        conditions = [['tags_key', 'NOT IN', self.EXCLUDE_TAG_KEYS]]

        values_by_key = snuba.query(
            start, end, ['tags_key', 'tags_value'], conditions, filters, aggregations,
            orderby='-count', limitby=[value_limit, 'tags_key'],
            referrer='tagstore.__get_tag_keys_and_top_values'
        )

        # Then supplement the key objects with the top values for each.
        if group_id is None:
            value_ctor = TagValue
        else:
            value_ctor = functools.partial(GroupTagValue, group_id=group_id)

        for keyobj in keys_with_counts:
            key = keyobj.key
            values = values_by_key.get(key, [])
            keyobj.top_values = [
                value_ctor(
                    key=keyobj.key,
                    value=value,
                    times_seen=data['count'],
                    first_seen=parse_datetime(data['first_seen']),
                    last_seen=parse_datetime(data['last_seen']),
                ) for value, data in six.iteritems(values)
            ]

        return keys_with_counts
Esempio n. 18
0
 def _assert_is_datetime(self, timestamp):
     """
     Internal helper to validate the type of the provided timestamp
     """
     if not timestamp:
         return False
     try:
         parse_datetime(timestamp)
     except ValueError:
         return False
     else:
         return True
Esempio n. 19
0
 def from_dict(cls, obj):
     return cls(
         obj["summary"],
         obj["guid"],
         parse_datetime(obj["start"]["utcdate"])
         .replace(tzinfo=pytz.timezone("America/New_York"))
         .astimezone(pytz.utc),
         parse_datetime(obj["end"]["utcdate"])
         .replace(tzinfo=pytz.timezone("America/New_York"))
         .astimezone(pytz.utc),
         obj["start"]["allday"] == "true",
     )
Esempio n. 20
0
def _parse_args(arguments):
    if arguments['--start'] is not None:
        start_datetime = _make_utc(parse_datetime(arguments['--start']))
    else:
        start_datetime = datetime.datetime.now(pytz.UTC).replace(
            second=0, microsecond=0)

    if arguments['--end'] is not None:
        end_datetime = _make_utc(parse_datetime(arguments['--end']))
    else:
        end_datetime = start_datetime + datetime.timedelta(minutes=30)
    return arguments['--constituents'], start_datetime, end_datetime
Esempio n. 21
0
    def __get_tag_key_and_top_values(self, project_id, group_id, environment_id,
                                     key, limit=3, raise_on_empty=True):
        start, end = self.get_time_range()
        tag = u'tags[{}]'.format(key)
        filters = {
            'project_id': [project_id],
        }
        if environment_id:
            filters['environment'] = [environment_id]
        if group_id is not None:
            filters['issue'] = [group_id]
        conditions = [[tag, '!=', '']]
        aggregations = [
            ['uniq', tag, 'values_seen'],
            ['count()', '', 'count'],
            ['min', SEEN_COLUMN, 'first_seen'],
            ['max', SEEN_COLUMN, 'last_seen'],
        ]

        result, totals = snuba.query(
            start, end, [tag], conditions, filters, aggregations,
            orderby='-count', limit=limit, totals=True,
            referrer='tagstore.__get_tag_key_and_top_values'
        )

        if raise_on_empty and (not result or totals.get('count', 0) == 0):
            raise TagKeyNotFound if group_id is None else GroupTagKeyNotFound
        else:
            if group_id is None:
                key_ctor = TagKey
                value_ctor = TagValue
            else:
                key_ctor = functools.partial(GroupTagKey, group_id=group_id)
                value_ctor = functools.partial(GroupTagValue, group_id=group_id)

            top_values = [
                value_ctor(
                    key=key,
                    value=value,
                    times_seen=data['count'],
                    first_seen=parse_datetime(data['first_seen']),
                    last_seen=parse_datetime(data['last_seen']),
                ) for value, data in six.iteritems(result)
            ]

            return key_ctor(
                key=key,
                values_seen=totals.get('values_seen', 0),
                count=totals.get('count', 0),
                top_values=top_values
            )
Esempio n. 22
0
    def search(self, search_value, search_key=None):
        """
        Searches and returns a ShipmentTrack instance for the given
        keyword. If nothing found, raises :class:`NoMatchFound`
        exception.
        """
        url = self.config.get_url(search_value, search_key)
        resource_as_xml = urllib.urlopen(url).read()
        resource_as_json = xml2json.xml2json(resource_as_xml)
        resource_as_dict = json.loads(resource_as_json)

        if not isinstance(resource_as_dict["SHIPMENTTRACK"]["SHIPMENTREPORT"], dict):
            raise NoMatchFound("No match found for %s" % (search_value))

        st = ShipmentTrack(
            SEARCHON=parse_datetime(resource_as_dict["SHIPMENTTRACK"]["SEARCHON"]),
            AWB=resource_as_dict["SHIPMENTTRACK"]["SHIPMENTREPORT"]["AWB"],
            CONSIGNEENAME=resource_as_dict["SHIPMENTTRACK"]["SHIPMENTREPORT"]["CONSIGNEENAME"],
            CONSIGNORNAME=resource_as_dict["SHIPMENTTRACK"]["SHIPMENTREPORT"]["CONSIGNORNAME"],
            CURRENTSTATUS=resource_as_dict["SHIPMENTTRACK"]["SHIPMENTREPORT"]["CURRENTSTATUS"],
            DESTINATION=resource_as_dict["SHIPMENTTRACK"]["SHIPMENTREPORT"]["DESTINATION"],
            ORIGIN=resource_as_dict["SHIPMENTTRACK"]["SHIPMENTREPORT"]["ORIGIN"],
            PICKUPDATE=parse_datetime(resource_as_dict["SHIPMENTTRACK"]["SHIPMENTREPORT"]["PICKUPDATE"]),
            SHIPMENTREFERENCENUMBER=resource_as_dict["SHIPMENTTRACK"]["SHIPMENTREPORT"]["SHIPMENTREFERENCENUMBER"],
            TOTALWEIGHT=Decimal(resource_as_dict["SHIPMENTTRACK"]["SHIPMENTREPORT"]["TOTALWEIGHT"]))

        cps_raw = resource_as_dict["SHIPMENTTRACK"]["SHIPMENTREPORT"]["CHECKPOINTDETAILS"]["CHECKPOINTS"]
        cps_tba = []
        if isinstance(cps_raw, list):
            for cp in cps_raw:
                cps_tba.append(ShipmentTrackCP(
                    CHECKDATE=cp["CHECKDATE"],
                    CHECKPOINT=cp["CHECKPOINT"],
                    CHECKPOINTDESCRIPTION=cp["CHECKPOINTDESCRIPTION"],
                    CHECKTIME=cp["CHECKTIME"],
                    LOCATIONNAME=cp["LOCATIONNAME"],
                    CHECKDATETIME=parse_datetime("%s %s" % (cp["CHECKDATE"], cp["CHECKTIME"]))))
        else:
            cp = cps_raw
            cps_tba.append(ShipmentTrackCP(
                CHECKDATE=cp["CHECKDATE"],
                CHECKPOINT=cp["CHECKPOINT"],
                CHECKPOINTDESCRIPTION=cp["CHECKPOINTDESCRIPTION"],
                CHECKTIME=cp["CHECKTIME"],
                LOCATIONNAME=cp["LOCATIONNAME"],
                CHECKDATETIME=parse_datetime("%s %s" % (cp["CHECKDATE"], cp["CHECKTIME"]))))
        cps_tba = sorted(cps_tba, key=lambda x: x["CHECKDATETIME"])
        for cp in cps_tba:
            st.add_cp(cp)
        return st
Esempio n. 23
0
    def __init__(self, **kwargs):
        self.key = kwargs.get('key')

        if 'expiration' in kwargs:
            self.expiration = parse_datetime(kwargs['expiration'])
        else:
            self.expiration = None
Esempio n. 24
0
    def update(self, oauth_token):
        logging.debug("Updating issue %i" % self.number)
        # Record basic information about this pull request
        issue_response = raw_github_request(PULLS_BASE + '/%i' % self.number,
                                            oauth_token=oauth_token, etag=self.etag)
        if issue_response is None:
            logging.debug("Issue %i hasn't changed since last visit; skipping" % self.number)
            return
        self.pr_json = json.loads(issue_response.content)
        self.etag = issue_response.headers["ETag"]
        updated_at = \
            parse_datetime(self.pr_json['updated_at']).astimezone(tz.tzutc()).replace(tzinfo=None)
        self.user = self.pr_json['user']['login']
        self.updated_at = updated_at
        self.state = self.pr_json['state']

        # TODO: will miss comments if we exceed the pagination limit:
        comments_response = raw_github_request(ISSUES_BASE + '/%i/comments' % self.number,
                                               oauth_token=oauth_token, etag=self.comments_etag)
        if comments_response is not None:
            self.comments_json = json.loads(comments_response.content)
            self.comments_etag = comments_response.headers["ETag"]

        files_response = raw_github_request(PULLS_BASE + "/%i/files" % self.number,
                                            oauth_token=oauth_token, etag=self.files_etag)
        if files_response is not None:
            self.files_json = json.loads(files_response.content)
            self.files_etag = files_response.headers["ETag"]

        self.cached_last_jenkins_outcome = None
        self.last_jenkins_outcome  # force recomputation of Jenkins outcome
        self.cached_commenters = self._compute_commenters()

        # Write our modifications back to the database
        self.put()
Esempio n. 25
0
    def test_releases_request(self):
        now = parse_datetime('2018-03-09T01:00:00Z')
        project = self.create_project()
        release = Release.objects.create(
            organization_id=self.organization.id,
            version='version X',
            date_added=now,
        )
        release.add_project(project)
        dts = [now + timedelta(hours=i) for i in range(4)]

        with responses.RequestsMock() as rsps:
            def snuba_response(request):
                body = json.loads(request.body)
                assert body['aggregations'] == [['count()', None, 'aggregate']]
                assert body['project'] == [project.id]
                assert body['groupby'] == ['release', 'time']
                assert ['release', 'IN', ['version X']] in body['conditions']
                return (200, {}, json.dumps({
                    'data': [{'release': 'version X', 'time': '2018-03-09T01:00:00Z', 'aggregate': 100}],
                    'meta': [{'name': 'release'}, {'name': 'time'}, {'name': 'aggregate'}]
                }))

            rsps.add_callback(
                responses.POST,
                settings.SENTRY_SNUBA + '/query',
                callback=snuba_response)
            results = self.db.get_range(
                TSDBModel.release, [release.id], dts[0], dts[-1], rollup=3600)
            assert results == {
                release.id: [
                    (int(to_timestamp(d)), 100 if d == now else 0)
                    for d in dts]
            }
Esempio n. 26
0
    def test_groups_request(self):
        now = parse_datetime('2018-03-09T01:00:00Z')
        dts = [now + timedelta(hours=i) for i in range(4)]
        project = self.create_project()
        group = self.create_group(project=project)
        GroupHash.objects.create(project=project, group=group, hash='0' * 32)
        group2 = self.create_group(project=project)
        GroupHash.objects.create(project=project, group=group2, hash='1' * 32)

        with responses.RequestsMock() as rsps:
            def snuba_response(request):
                body = json.loads(request.body)
                assert body['aggregations'] == [['count()', None, 'aggregate']]
                assert body['project'] == [project.id]
                assert body['groupby'] == ['issue', 'time']

                # Assert issue->hash map is generated, but only for referenced issues
                assert [group.id, ['0' * 32]] in body['issues']
                assert [group2.id, ['1' * 32]] not in body['issues']

                return (200, {}, json.dumps({
                    'data': [{'time': '2018-03-09T01:00:00Z', 'issue': 1, 'aggregate': 100}],
                    'meta': [{'name': 'time'}, {'name': 'issue'}, {'name': 'aggregate'}]
                }))

            rsps.add_callback(
                responses.POST,
                settings.SENTRY_SNUBA + '/query',
                callback=snuba_response)
            results = self.db.get_range(TSDBModel.group, [group.id], dts[0], dts[-1])
            assert results is not None
Esempio n. 27
0
def to_python(obj,
    in_dict,
    str_keys=None,
    date_keys=None,
    int_keys=None,
    object_map=None,
    bool_keys=None,
    dict_keys=None,
    **kwargs):
    """Extends a given object for API Consumption.

    :param obj: Object to extend.
    :param in_dict: Dict to extract data from.
    :param string_keys: List of in_dict keys that will be extracted as strings.
    :param date_keys: List of in_dict keys that will be extrad as datetimes.
    :param object_map: Dict of {key, obj} map, for nested object results.
    """

    d = dict()

    if str_keys:
        for in_key in str_keys:
            d[in_key] = in_dict.get(in_key)

    if date_keys:
        for in_key in date_keys:
            in_date = in_dict.get(in_key)
            try:
                out_date = parse_datetime(in_date)
            except TypeError, e:
                raise e
                out_date = None

            d[in_key] = out_date
Esempio n. 28
0
def to_api(in_dict, int_keys=None, date_keys=None, bool_keys=None):
    """Extends a given object for API Production."""

    # Cast all int_keys to int()
    if int_keys:
        for in_key in int_keys:
            if (in_key in in_dict) and (in_dict.get(in_key, None) is not None):
                in_dict[in_key] = int(in_dict[in_key])

    # Cast all date_keys to datetime.isoformat
    if date_keys:
        for in_key in date_keys:
            if (in_key in in_dict) and (in_dict.get(in_key, None) is not None):

                _from = in_dict[in_key]

                if isinstance(_from, basestring):
                    dtime = parse_datetime(_from)

                elif isinstance(_from, datetime):
                    dtime = _from

                in_dict[in_key] = dtime.isoformat()

            elif (in_key in in_dict) and in_dict.get(in_key, None) is None:
                del in_dict[in_key]

    # Remove all Nones
    for k, v in in_dict.items():
        if v is None:
            del in_dict[k]

    return in_dict
Esempio n. 29
0
    def test_environment_request(self):
        now = parse_datetime('2018-03-09T01:00:00Z')
        project = self.create_project()
        env = self.create_environment(project=project, name="prod")
        dts = [now + timedelta(hours=i) for i in range(4)]

        with responses.RequestsMock() as rsps:
            def snuba_response(request):
                body = json.loads(request.body)
                assert body['aggregations'] == [['count()', None, 'aggregate']]
                assert body['project'] == [project.id]
                assert body['groupby'] == ['project_id', 'time']
                assert ['environment', 'IN', ['prod']] in body['conditions']
                return (200, {}, json.dumps({
                    'data': [{'project_id': project.id, 'time': '2018-03-09T01:00:00Z', 'aggregate': 100}],
                    'meta': [{'name': 'project_id'}, {'name': 'time'}, {'name': 'aggregate'}]
                }))

            rsps.add_callback(
                responses.POST,
                settings.SENTRY_SNUBA + '/query',
                callback=snuba_response)
            results = self.db.get_range(TSDBModel.project, [project.id],
                                        dts[0], dts[-1], environment_id=env.id, rollup=3600)
            assert results == {
                project.id: [
                    (int(to_timestamp(d)), 100 if d == now else 0)
                    for d in dts]
            }
Esempio n. 30
0
def strings_to_dates(model, dictionary):
    """Returns a new dictionary with all the mappings of `dictionary` but
    with date strings mapped to :class:`datetime.datetime` objects.

    The keys of `dictionary` are names of fields in the model specified in the
    constructor of this class. The values are values to set on these fields. If
    a field name corresponds to a field in the model which is a
    :class:`sqlalchemy.types.Date` or :class:`sqlalchemy.types.DateTime`, then
    the returned dictionary will have the corresponding
    :class:`datetime.datetime` Python object as the value of that mapping in
    place of the string.

    This function outputs a new dictionary; it does not modify the argument.

    """
    result = {}
    for fieldname, value in dictionary.iteritems():
        if is_date_field(model, fieldname) and value is not None:
            if value.strip() == '':
                result[fieldname] = None
            else:
                result[fieldname] = parse_datetime(value)
        else:
            result[fieldname] = value
    return result
Esempio n. 31
0
    def validate_query(query, request, user):
        """
		Validate custom data input

		Confirms that the uploaded file is a valid CSV or tab file and, if so, returns
		some metadata.

		:param dict query:  Query parameters, from client-side.
		:param request:  Flask request
		:param User user:  User object of user who has submitted the query
		:return dict:  Safe query parameters
		"""

        # do we have an uploaded file?
        if "option-data_upload" not in request.files:
            raise QueryParametersException("No file was offered for upload.")

        file = request.files["option-data_upload"]
        if not file:
            raise QueryParametersException("No file was offered for upload.")

        encoding = sniff_encoding(file)

        wrapped_file = io.TextIOWrapper(file, encoding=encoding)
        sample = wrapped_file.read(1024 * 1024)
        wrapped_file.seek(0)
        dialect = csv.Sniffer().sniff(sample, delimiters=(",", ";", "\t"))

        # With validated csvs, save as is but make sure the raw file is sorted
        reader = csv.DictReader(wrapped_file, dialect=dialect)

        try:
            fields = reader.fieldnames
        except UnicodeDecodeError:
            raise QueryParametersException(
                "Uploaded file is not a well-formed CSV or TAB file.")

        # check if all required fields are present
        required = ("id", "thread_id", "subject", "author", "body",
                    "timestamp")
        missing = []
        for field in required:
            if field not in reader.fieldnames:
                missing.append(field)

        if missing:
            raise QueryParametersException(
                "The following required columns are not present in the csv file: %s"
                % ", ".join(missing))

        try:
            row = reader.__next__()
            try:
                parse_datetime(row["timestamp"])
            except ValueError:
                raise QueryParametersException(
                    "Your 'timestamp' column does not use a recognisable format (yyyy-mm-dd hh:mm:ss is recommended)"
                )
        except StopIteration:
            pass

        wrapped_file.detach()

        # Whether to strip the HTML tags
        strip_html = False
        if query.get("strip_html"):
            strip_html = True

        # return metadata - the filename is sanitised and serves no purpose at
        # this point in time, but can be used to uniquely identify a dataset
        disallowed_characters = re.compile(r"[^a-zA-Z0-9._+-]")
        return {
            "filename": disallowed_characters.sub("", file.filename),
            "time": time.time(),
            "datasource": "custom",
            "board": "upload",
            "strip_html": strip_html
        }
Esempio n. 32
0
    def test_result_shape(self):
        """
        Tests that the results from the different TSDB methods have the
        expected format.
        """
        now = parse_datetime('2018-03-09T01:00:00Z')
        project_id = 194503
        dts = [now + timedelta(hours=i) for i in range(4)]

        with responses.RequestsMock() as rsps:

            def snuba_response(request):
                body = json.loads(request.body)
                aggs = body.get('aggregations', [])
                meta = [{
                    'name': col
                } for col in body['groupby'] + [a[2] for a in aggs]]
                datum = {col['name']: 1 for col in meta}
                datum['project_id'] = project_id
                if 'time' in datum:
                    datum['time'] = '2018-03-09T01:00:00Z'
                for agg in aggs:
                    if agg[0].startswith('topK'):
                        datum[agg[2]] = [99]
                return (200, {}, json.dumps({'data': [datum], 'meta': meta}))

            rsps.add_callback(responses.POST,
                              settings.SENTRY_SNUBA + '/query',
                              callback=snuba_response)

            results = self.db.get_most_frequent(
                TSDBModel.frequent_issues_by_project, [project_id], dts[0],
                dts[0])
            assert has_shape(results, {1: [(1, 1.0)]})

            results = self.db.get_most_frequent_series(
                TSDBModel.frequent_issues_by_project, [project_id], dts[0],
                dts[0])
            assert has_shape(results, {1: [(1, {1: 1.0})]})

            items = {
                project_id:
                (0, 1, 2)  # {project_id: (issue_id, issue_id, ...)}
            }
            results = self.db.get_frequency_series(
                TSDBModel.frequent_issues_by_project, items, dts[0], dts[-1])
            assert has_shape(results, {1: [(1, {1: 1})]})

            results = self.db.get_frequency_totals(
                TSDBModel.frequent_issues_by_project, items, dts[0], dts[-1])
            assert has_shape(results, {1: {1: 1}})

            results = self.db.get_range(TSDBModel.project, [project_id],
                                        dts[0], dts[-1])
            assert has_shape(results, {1: [(1, 1)]})

            results = self.db.get_distinct_counts_series(
                TSDBModel.users_affected_by_project, [project_id], dts[0],
                dts[-1])
            assert has_shape(results, {1: [(1, 1)]})

            results = self.db.get_distinct_counts_totals(
                TSDBModel.users_affected_by_project, [project_id], dts[0],
                dts[-1])
            assert has_shape(results, {1: 1})

            results = self.db.get_distinct_counts_union(
                TSDBModel.users_affected_by_project, [project_id], dts[0],
                dts[-1])
            assert has_shape(results, 1)
Esempio n. 33
0
    def get_group_tag_keys_and_top_values(
        self,
        project_id,
        group_id,
        environment_ids,
        user=None,
        keys=None,
        value_limit=TOP_VALUES_DEFAULT_LIMIT,
        **kwargs
    ):
        # Similar to __get_tag_key_and_top_values except we get the top values
        # for all the keys provided. value_limit in this case means the number
        # of top values for each key, so the total rows returned should be
        # num_keys * limit.

        # First get totals and unique counts by key.
        keys_with_counts = self.get_group_tag_keys(project_id, group_id, environment_ids, keys=keys)

        # Then get the top values with first_seen/last_seen/count for each
        filters = {"project_id": get_project_list(project_id)}
        if environment_ids:
            filters["environment"] = environment_ids
        if keys is not None:
            filters["tags_key"] = keys
        if group_id is not None:
            filters["group_id"] = [group_id]
        conditions = kwargs.get("conditions", [])
        aggregations = kwargs.get("aggregations", [])
        aggregations += [
            ["count()", "", "count"],
            ["min", SEEN_COLUMN, "first_seen"],
            ["max", SEEN_COLUMN, "last_seen"],
        ]

        values_by_key = snuba.query(
            start=kwargs.get("start"),
            end=kwargs.get("end"),
            groupby=["tags_key", "tags_value"],
            conditions=conditions,
            filter_keys=filters,
            aggregations=aggregations,
            orderby="-count",
            limitby=[value_limit, "tags_key"],
            referrer="tagstore.__get_tag_keys_and_top_values",
        )

        # Then supplement the key objects with the top values for each.
        if group_id is None:
            value_ctor = TagValue
        else:
            value_ctor = functools.partial(GroupTagValue, group_id=group_id)

        for keyobj in keys_with_counts:
            key = keyobj.key
            values = values_by_key.get(key, [])
            keyobj.top_values = [
                value_ctor(
                    key=keyobj.key,
                    value=value,
                    times_seen=data["count"],
                    first_seen=parse_datetime(data["first_seen"]),
                    last_seen=parse_datetime(data["last_seen"]),
                )
                for value, data in six.iteritems(values)
            ]

        return keys_with_counts
Esempio n. 34
0
def get_metadata(arxiv):
    """Get metadata about an arxiv publication from website.

    Scrapes the arXiv webpage corresponding to the paper with the `arxiv`
    identifier and return the metadata for the paper in a dictionary.

    Parameters
    ----------
    arxiv : str
        ArXiv identifier.

    Returns
    -------
    metadata : dict
        Dictionary with metadata.

    Notes
    -----
    This function queries arXiv. It must not be used to crawl arXiv.
    It does not look at robots.txt.

    This function currently uses 'abs' HTML pages and not the arXiv API or
    https://arxiv.org/help/oa/index which is the approved way.

    References
    ----------
    - https://arxiv.org
    - https://arxiv.org/help/robots

    Examples
    --------
    >>> metadata = get_metadata('1503.00759')
    >>> metadata['doi'] == '10.1109/JPROC.2015.2483592'
    True

    """
    arxiv = arxiv.strip()
    url = 'https://arxiv.org/abs/' + arxiv
    headers = {'User-agent': USER_AGENT}
    response = requests.get(url, headers=headers)
    tree = etree.HTML(response.content)

    submissions = tree.xpath('//div[@class="submission-history"]/text()')
    datetime_as_string = submissions[-1][5:30]
    isodatetime = parse_datetime(datetime_as_string).isoformat()

    subjects = tree.xpath('//td[@class="tablecell subjects"]/span/text()'
                          '|'
                          '//td[@class="tablecell subjects"]/text()')
    arxiv_classifications = [
        match for subject in subjects
        for match in re.findall(r'\((.*?)\)', subject)
    ]

    metadata = {
        'arxiv': arxiv,
        'authornames': tree.xpath('//div[@class="authors"]/a/text()'),
        'full_text_url': 'https://arxiv.org/pdf/' + arxiv + '.pdf',
        'publication_date': isodatetime[:10],
        'title': re.sub(r'\s+', ' ',
                        tree.xpath('//h1/text()')[-1].strip()),
        'arxiv_classifications': arxiv_classifications,
    }

    # Optional DOI
    doi = tree.xpath('//td[@class="tablecell doi"]/a/text()')
    if not doi:
        doi = tree.xpath('//td[@class="tablecell msc_classes"]/a/text()')
    if doi:
        metadata['doi'] = doi[0]

    return metadata
Esempio n. 35
0
    def _create_ludwig_dataframe(self, mode):
        has_heavy_data = False
        col_map = {}

        if mode == 'train':
            df = self.transaction.input_data.train_df
        elif mode == 'predict':
            df = self.transaction.input_data.data_frame
        elif mode == 'validate':
            df = self.transaction.input_data.validation_df
        elif mode == 'test':
            df = self.transaction.input_data.test_df
        else:
            raise Exception(f'Unknown mode specified: "{mode}"')
        model_definition = {'input_features': [], 'output_features': []}
        data = {}

        if self.transaction.lmd['model_order_by'] is None:
            timeseries_cols = []
        else:
            timeseries_cols = list(
                map(lambda x: x[0], self.transaction.lmd['model_order_by']))

        for col in df.columns:
            tf_col = get_tensorflow_colname(col)
            col_map[tf_col] = col

            # Handle malformed columns
            if col in self.transaction.lmd['columns_to_ignore']:
                continue

            data[tf_col] = []

            col_stats = self.transaction.lmd['column_stats'][col]
            data_subtype = col_stats['data_subtype']

            ludwig_dtype = None
            encoder = None
            cell_type = None
            in_memory = None
            height = None
            width = None

            if col in timeseries_cols:
                encoder = 'rnn'
                cell_type = 'rnn'
                ludwig_dtype = 'order_by_col'

            if data_subtype in DATA_SUBTYPES.ARRAY:
                encoder = 'rnn'
                cell_type = 'rnn'
                ludwig_dtype = 'sequence'

            elif data_subtype in (DATA_SUBTYPES.INT, DATA_SUBTYPES.FLOAT):
                ludwig_dtype = 'numerical'

            elif data_subtype in (DATA_SUBTYPES.BINARY):
                ludwig_dtype = 'category'

            elif data_subtype in (DATA_SUBTYPES.DATE):
                if col not in self.transaction.lmd['predict_columns']:
                    ludwig_dtype = 'date'
                else:
                    ludwig_dtype = 'category'

            elif data_subtype in (DATA_SUBTYPES.TIMESTAMP):
                ludwig_dtype = 'numerical'

            elif data_subtype in (DATA_SUBTYPES.SINGLE,
                                  DATA_SUBTYPES.MULTIPLE):
                ludwig_dtype = 'category'

            elif data_subtype in (DATA_SUBTYPES.IMAGE):
                has_heavy_data = True
                ludwig_dtype = 'image'
                encoder = 'stacked_cnn'
                in_memory = True
                height = 256
                width = 256

            elif data_subtype in (DATA_SUBTYPES.TEXT):
                ludwig_dtype = 'text'

            else:
                # @TODO Maybe regress to some other similar subtype or use the principal data type for certain values
                self.transaction.log.error(
                    f'The Ludwig backend doesn\'t support the "{data_subtype}" data type !'
                )
                estr = f'Data subtype "{data_subtype}" no supported by Ludwig model backend'
                raise Exception(estr)

            custom_logic_continue = False

            for index, row in df.iterrows():
                if ludwig_dtype == 'order_by_col':
                    ts_data_point = row[col]

                    try:
                        ts_data_point = float(ts_data_point)
                    except:
                        ts_data_point = parse_datetime(
                            ts_data_point).timestamp()
                    data[tf_col].append(ts_data_point)

                elif ludwig_dtype == 'sequence':
                    arr_str = row[col]
                    if arr_str is not None:
                        arr = list(
                            map(
                                float,
                                arr_str.rstrip(']').lstrip('[').split(
                                    self.transaction.lmd['column_stats'][col]
                                    ['separator'])))
                    else:
                        arr = ''
                    data[tf_col].append(arr)

                # Date isn't supported yet, so we hack around it
                elif ludwig_dtype == 'date':
                    if col in data:
                        data.pop(col)
                        data[tf_col + '_year'] = []
                        data[tf_col + '_month'] = []
                        data[tf_col + '_day'] = []

                        model_definition['input_features'].append({
                            'name':
                            col + '_year',
                            'type':
                            'category'
                        })
                        model_definition['input_features'].append({
                            'name':
                            col + '_month',
                            'type':
                            'category'
                        })
                        model_definition['input_features'].append({
                            'name':
                            col + '_day',
                            'type':
                            'numerical'
                        })

                    date = parse_datetime(row[col])

                    data[tf_col + '_year'].append(date.year)
                    data[tf_col + '_month'].append(date.month)
                    data[tf_col + '_day'].append(date.day)

                    custom_logic_continue = True

                    if col in timeseries_cols:
                        timeseries_cols.remove(col)
                        timeseries_cols.append(col + '_day')
                        timeseries_cols.append(col + '_month')
                        timeseries_cols.append(col + '_year')

                elif data_subtype in (DATA_SUBTYPES.TIMESTAMP):
                    if row[col] is None:
                        unix_ts = 0
                    else:
                        unix_ts = parse_datetime(row[col]).timestamp()

                    data[tf_col].append(unix_ts)

                elif data_subtype in (DATA_SUBTYPES.FLOAT):
                    if type(row[col]) == str:
                        data[tf_col].append(
                            float(str(row[col]).replace(',', '.')))
                    else:
                        data[tf_col].append(row[col])

                elif data_subtype in (DATA_SUBTYPES.INT):
                    if type(row[col]) == str:
                        data[tf_col].append(
                            round(float(str(row[col]).replace(',', '.'))))
                    else:
                        data[tf_col].append(row[col])

                elif data_subtype in (DATA_SUBTYPES.IMAGE):
                    if os.path.isabs(row[col]):
                        data[tf_col].append(row[col])
                    else:
                        data[tf_col].append(os.path.join(
                            os.getcwd(), row[col]))
                else:
                    data[tf_col].append(row[col])

            if custom_logic_continue:
                continue

            if col not in self.transaction.lmd['predict_columns']:
                input_def = {'name': tf_col, 'type': ludwig_dtype}
                if encoder is not None:
                    input_def['encoder'] = encoder
                if cell_type is not None:
                    input_def['cell_type'] = cell_type
                if in_memory is not None:
                    input_def['in_memory'] = in_memory

                if height is not None and width is not None:
                    input_def['height'] = height
                    input_def['width'] = width
                    input_def['resize_image'] = True
                    input_def['resize_method'] = 'crop_or_pad'
                    model_definition['preprocessing'] = {
                        'image': {
                            'height': height,
                            'width': width,
                            'resize_image': True,
                            'resize_method': 'crop_or_pad',
                            'num_channels': 3
                        }
                    }

                model_definition['input_features'].append(input_def)
            else:
                output_def = {'name': tf_col, 'type': ludwig_dtype}
                model_definition['output_features'].append(output_def)

        df = pd.DataFrame(data=data)
        if len(timeseries_cols) > 0:
            df.sort_values(timeseries_cols)

        return df, model_definition, timeseries_cols, has_heavy_data, col_map
Esempio n. 36
0
def parse_date(date_str):
    return parse_datetime(date_str) if date_str is not None else None
Esempio n. 37
0
 def deserialize(self, text):
     if text is not None:
         return parse_datetime(text, fuzzy=False)
Esempio n. 38
0
def to_python(obj,
    in_dict,
    str_keys=None,
    date_keys=None,
    int_keys=None,
    float_keys=None,
    object_map=None,
    bool_keys=None,
    dict_keys=None,
    **kwargs):
    """Extends a given object for API Consumption.

    :param obj: Object to extend.
    :param in_dict: Dict to extract data from.
    :param string_keys: List of in_dict keys that will be extracted as strings.
    :param date_keys: List of in_dict keys that will be extrad as datetimes.
    :param object_map: Dict of {key, obj} map, for nested object results.
    """

    d = dict()

    if str_keys:
        for in_key in str_keys:
            d[in_key] = in_dict.get(in_key)

    if date_keys:
        for in_key in date_keys:
            in_date = in_dict.get(in_key)
            try:
                out_date = parse_datetime(in_date)
            except Exception as e:
                #raise e
                out_date = None

            d[in_key] = out_date

    if int_keys:
        for in_key in int_keys:
            if (in_dict is not None) and (in_dict.get(in_key) is not None):
                d[in_key] = int(in_dict.get(in_key))

    if float_keys:
        for in_key in float_keys:
            if (in_dict is not None) and (in_dict.get(in_key) is not None):
                d[in_key] = float(in_dict.get(in_key))

    if bool_keys:
        for in_key in bool_keys:
            if in_dict.get(in_key) is not None:
                d[in_key] = bool(in_dict.get(in_key))

    if dict_keys:
        for in_key in dict_keys:
            if in_dict.get(in_key) is not None:
                d[in_key] = dict(in_dict.get(in_key))

    if object_map:
        for (k, v) in object_map.items():
            if in_dict.get(k):
                d[k] = v.new_from_dict(in_dict.get(k))

    obj.__dict__.update(d)
    obj.__dict__.update(kwargs)

    # Save the dictionary, for write comparisons.
    # obj._cache = d
    # obj.__cache = in_dict

    return obj
Esempio n. 39
0
def parse_snuba_datetime(value):
    """Parses a datetime value from snuba."""
    return parse_datetime(value)
Esempio n. 40
0
            self.relationship_id = self.relationship_id
            self.relationship['cwe_id'] = self.relationship_id


# make parser
parser = make_parser()
ch = CWEHandler()
parser.setContentHandler(ch)
# check modification date
try:
    (f, r) = Configuration.getFeedData('cwe')
except Exception as e:
    print(e)
    sys.exit("Cannot open url %s. Bad URL or not connected to the internet?" %
             (Configuration.getFeedURL("cwe")))
lastmodified = parse_datetime(r.headers['last-modified'], ignoretz=True)
i = db.getLastModified('cwe')
if i is not None and not args.f:
    if lastmodified == i:
        print("Not modified")
        sys.exit(0)

# parse xml and store in database
parser.parse(f)
cweList = []

for cwe in progressbar(ch.cwe):
    cwe['Description'] = cwe['Description'].replace("\t\t\t\t\t", " ")
    if args.v:
        print(cwe)
    cweList.append(cwe)
Esempio n. 41
0
def get_point_values(d,
                     date_trunc=DEFAULT_RES,
                     value_func='avg',
                     trange=DEFAULT_RANGE,
                     ts_as_datetime=False):
    conn = get_crate_connection()
    cursor = conn.cursor()
    # validate the date_trunc
    if date_trunc not in ['day', 'hour', 'minute', 'second']:
        date_trunc = DEFAULT_RES
    # use different queries for Number type sensors
    is_number = 'Number' == d.kind
    is_bool = 'Bool' == d.kind
    if not trange:
        trange = DEFAULT_RANGE
    # convert the range
    end = datetime.utcnow()
    start = datetime.utcnow()
    if 'today' == trange:
        start -= timedelta(days=1)
    elif 'yesterday' == trange:
        start -= timedelta(days=2)
        end -= timedelta(days=1)
    elif len(trange) > 7 and ' months' == trange[-7:]:
        start -= timedelta(days=30 * int(trange[:-7]))
    elif len(trange) > 6 and ' month' == trange[-6:]:
        start -= timedelta(days=30 * int(trange[:-6]))
    elif len(trange) > 5 and ' days' == trange[-5:]:
        start -= timedelta(days=int(trange[:-5]))
    elif len(trange) > 1 and 'h' == trange[-1:]:
        start -= timedelta(hours=int(trange[:-1]))
    elif len(trange) > 1 and 'm' == trange[-1:]:
        start -= timedelta(minutes=int(trange[:-1]))
    else:
        # can be given as <date> or <date>,<date>
        dr = trange.split(',')
        start = parse_datetime(dr[0])
        if len(dr) > 1:
            end = parse_datetime(dr[1])

    logger.info("Getting data points for range %s -- %s", start, end)

    if is_number:
        sql = """SELECT DATE_TRUNC('{}', ts) as timest, {}(double_value) FROM "volttron"."data"
                 WHERE topic = ? AND ts > ? AND ts <= ?
                 GROUP BY timest ORDER BY timest DESC;""".format(
            date_trunc, value_func)
    elif is_bool:
        # use MIN as function since we query string_value
        sql = """SELECT DATE_TRUNC('{}', ts) as timest, MIN(string_value), {}(double_value) FROM "volttron"."data"
                 WHERE topic = ? AND ts > ? AND ts <= ?
                 GROUP BY timest ORDER BY timest DESC;""".format(
            date_trunc, value_func)
    else:
        sql = """SELECT ts, string_value FROM "volttron"."data"
                 WHERE topic = ? AND ts > ? AND ts <= ? ORDER BY ts DESC;"""
    cursor.execute(sql, (
        d.topic,
        start,
        end,
    ))
    data = []
    while True:
        result = cursor.fetchone()
        if result is None:
            break
        ts = result[0]
        value = result[1]
        if is_bool:
            if value and (value == 't' or value != '0'):
                value = 1
            else:
                value = round(result[2])
        if ts_as_datetime:
            # convert from epoch to datetime directly
            ts = datetime.utcfromtimestamp(
                ts // 1000).replace(microsecond=0).replace(tzinfo=timezone.utc)
        data.append([ts, value])
    logger.info("Got %s data points for %s", len(data), d.entity_id)
    cursor.close()
    conn.close()
    return list(reversed(data))
Esempio n. 42
0
def filter_logic(f, limit, skip):
    query = []
    # retrieving lists
    if f['blacklistSelect'] == "on":
        regexes = db.getRules('blacklist')
        if len(regexes) != 0:
            exp = "^(?!" + "|".join(regexes) + ")"
            query.append({
                '$or': [{
                    'vulnerable_configuration': re.compile(exp)
                }, {
                    'vulnerable_configuration': {
                        '$exists': False
                    }
                }, {
                    'vulnerable_configuration': []
                }]
            })
    if f['whitelistSelect'] == "hide":
        regexes = db.getRules('whitelist')
        if len(regexes) != 0:
            exp = "^(?!" + "|".join(regexes) + ")"
            query.append({
                '$or': [{
                    'vulnerable_configuration': re.compile(exp)
                }, {
                    'vulnerable_configuration': {
                        '$exists': False
                    }
                }, {
                    'vulnerable_configuration': []
                }]
            })
    if f['unlistedSelect'] == "hide":
        wlregexes = compile(db.getRules('whitelist'))
        blregexes = compile(db.getRules('blacklist'))
        query.append({
            '$or': [{
                'vulnerable_configuration': {
                    '$in': wlregexes
                }
            }, {
                'vulnerable_configuration': {
                    '$in': blregexes
                }
            }]
        })
    if f['rejectedSelect'] == "hide":
        exp = "^(?!\*\* REJECT \*\*\s+DO NOT USE THIS CANDIDATE NUMBER.*)"
        query.append({'summary': re.compile(exp)})

    # plugin filters
    query.extend(plugManager.doFilter(f, **pluginArgs()))

    # cvss logic
    if f['cvssSelect'] == "above":
        query.append({'cvss': {'$gt': float(f['cvss'])}})
    elif f['cvssSelect'] == "equals":
        query.append({'cvss': float(f['cvss'])})
    elif f['cvssSelect'] == "below":
        query.append({'cvss': {'$lt': float(f['cvss'])}})

    # date logic
    if f['timeSelect'] != "all":
        startDate = parse_datetime(f['startDate'],
                                   ignoretz=True,
                                   dayfirst=True)
        endDate = parse_datetime(f['endDate'], ignoretz=True, dayfirst=True)
        if f['timeSelect'] == "from":
            query.append({f['timeTypeSelect']: {'$gt': startDate}})
        if f['timeSelect'] == "until":
            query.append({f['timeTypeSelect']: {'$lt': endDate}})
        if f['timeSelect'] == "between":
            query.append(
                {f['timeTypeSelect']: {
                     '$gt': startDate,
                     '$lt': endDate
                 }})
        if f['timeSelect'] == "outside":
            query.append({
                '$or': [{
                    f['timeTypeSelect']: {
                        '$lt': startDate
                    }
                }, {
                    f['timeTypeSelect']: {
                        '$gt': endDate
                    }
                }]
            })
    cve = db.getCVEs(limit=limit, skip=skip, query=query)
    # marking relevant records
    if f['whitelistSelect'] == "on": cve = whitelist_mark(cve)
    if f['blacklistSelect'] == "mark": cve = blacklist_mark(cve)
    plugManager.mark(cve, **pluginArgs())
    cve = list(cve)
    return cve
Esempio n. 43
0
    def after_create(query, dataset, request):
        """
		Hook to execute after the dataset for this source has been created

		In this case, it is used to save the uploaded file to the dataset's
		result path, and finalise the dataset metadata.

		:param dict query:  Sanitised query parameters
		:param DataSet dataset:  Dataset created for this query
		:param request:  Flask request submitted for its creation
		"""

        strip_html = query.get("strip_html")

        file = request.files["option-data_upload"]

        file.seek(0)

        # detect encoding - UTF-8 with or without BOM
        encoding = sniff_encoding(file)

        wrapped_file = io.TextIOWrapper(file, encoding=encoding)
        sample = wrapped_file.read(1024 * 1024)
        wrapped_file.seek(0)
        dialect = csv.Sniffer().sniff(sample, delimiters=(",", ";", "\t"))

        # With validated csvs, save as is but make sure the raw file is sorted
        reader = csv.DictReader(wrapped_file, dialect=dialect)
        with dataset.get_results_path().open("w", encoding="utf-8",
                                             newline="") as output_csv:
            # Sort by timestamp
            # note that this relies on the timestamp format to be sortable
            # but the alternative - first converting timestamps and then
            # sorting - would be quite intensive
            dataset.update_status("Sorting file by date")
            sorted_reader = sorted(
                reader,
                key=lambda row: row["timestamp"]
                if isinstance(row["timestamp"], str) else "")

            dataset.update_status("Writing to file")
            fieldnames = list(reader.fieldnames)
            if "unix_timestamp" not in fieldnames:
                fieldnames.append("unix_timestamp")

            writer = csv.DictWriter(output_csv, fieldnames=fieldnames)
            writer.writeheader()
            for row in sorted_reader:
                try:
                    sanitised_time = parse_datetime(row["timestamp"])
                    row["timestamp"] = sanitised_time.strftime(
                        "%Y-%m-%d %H:%I:%S")
                    row["unix_timestamp"] = sanitised_time.timestamp()
                except (TypeError, ValueError):
                    # bad format, skip
                    continue

                if strip_html:
                    row["body"] = strip_tags(row["body"])
                writer.writerow(row)

        file.close()

        with dataset.get_results_path().open(encoding="utf-8") as input:
            if file.filename.endswith(".tab"):
                reader = csv.DictReader(input,
                                        delimiter="\t",
                                        quoting=csv.QUOTE_NONE)
            else:
                reader = csv.DictReader(input)

            dataset.finish(sum(1 for line in reader))
            dataset.update_status("Result processed")

        dataset.update_version(get_software_version())
Esempio n. 44
0
def to_python(
        obj,
        in_dict,
        str_keys=None,
        date_keys=None,
        int_keys=None,
        real_keys=None,
        object_map=None,
        array_map=None,
        bool_keys=None,
        dict_keys=None,
        enums=None,
        **kwargs):
    """Extends a given object for API Consumption.
    :param obj: Object to extend.
    :param in_dict: Dict to extract data from.
    :param string_keys: List of in_dict keys that will be extracted as strings.
    :param date_keys: List of in_dict keys that will be extrad as datetimes.
    :param object_map: Dict of {key, obj} map, for nested object results.
    :param array_map: Dict of {key, obj} map, for nested object results.
    :param bool_keys: List of in_dict keys that will be extracted as bools
    :param dict_keys: Dict of {key, obj} map, for nested object results.
    :param enums: Dict of {key, `Enum`} map, that will be extracted as `Enum`

    """

    d = dict()

    if str_keys:
        for in_key in str_keys:
            private_name = '_' + in_key
            d[private_name] = in_dict.get(in_key)

    if date_keys:
        for in_key in date_keys:
            private_name = '_' + in_key
            in_date = in_dict.get(in_key)
            if in_date is not None:
                try:
                    out_date = parse_datetime(in_date)
                except Exception as e:
                    # if there is no date we fallback to None, exception is not required here
                    ##raise ZangException(e)
                    out_date = None
                d[private_name] = out_date
            else:
                d[private_name] = None

    if int_keys:
        for in_key in int_keys:
            private_name = '_' + in_key
            in_int = in_dict.get(in_key)
            if in_int is not None and in_int != '':
                try:
                    out_int = int(in_int)
                except ValueError as e:
                    raise ZangException(e)
                    out_int = None
                d[private_name] = out_int
            else:
                d[private_name] = None

    if real_keys:
        for in_key in real_keys:
            if (in_dict is not None) and (in_dict.get(in_key) is not None):
                private_name = '_' + in_key
                d[private_name] = float(in_dict.get(in_key))

    if bool_keys:
        for in_key in bool_keys:
            if in_dict.get(in_key) is not None:
                private_name = '_' + in_key
                value = in_dict.get(in_key)
                if isinstance(value, str_classes):
                    value = value.lower() == 'true'
                d[private_name] = value

    # ENUMS

    if enums:
        for in_key in enums:
            class_ = enums[in_key]
            if (in_dict is not None) and (in_dict.get(in_key) is not None):
                private_name = '_' + in_key
                dictValue = in_dict.get(in_key)
                # code below smells, need to clean it up
                try:
                    value = class_(dictValue)
                except Exception as e:
                    try:
                        value = class_(dictValue.title())
                    except Exception as e:
                        value = class_(dictValue.capitalize())
                d[private_name] = value

    # LISTS

    if dict_keys:
        for in_key in dict_keys:
            if in_dict.get(in_key) is not None:
                private_name = '_' + in_key
                d[private_name] = dict(in_dict.get(in_key))

    if object_map:
        for (k, v) in object_map.items():
            if in_dict.get(k):
                private_name = '_' + k
                d[private_name] = v.new_from_dict(in_dict.get(k))

    if array_map:
        for (k, v) in array_map.items():
            if in_dict.get(k):
                private_name = '_' + k
                d[private_name] = [v.new_from_dict(i) for i in in_dict.get(k)]

    obj.__dict__.update(d)
    obj.__dict__.update(kwargs)

    # Save the dictionary, for write comparisons.
    # obj._cache = d
    # obj.__cache = in_dict

    return obj
    def execute(self,
                ecosystem,
                bucket_name,
                object_key,
                from_date=None,
                to_date=None):
        """Aggregate gathered topics and store them on S3.

        :param ecosystem: ecosystem name for which topics should be gathered
        :param bucket_name: name of the destination bucket to which topics should be stored
        :param object_key: name of the object under which aggregated topics should be stored
        :param from_date: date limitation for task result queries
        :param to_date: date limitation for taks result queries
        """
        if from_date is not None:
            from_date = parse_datetime(from_date)
        if to_date is not None:
            to_date = parse_datetime(to_date)

        s3 = StoragePool.get_connected_storage('S3Data')
        postgres = StoragePool.get_connected_storage('PackagePostgres')

        base_query = postgres.session.query(WorkerResult).\
            join(Analysis). \
            join(Version).\
            join(Package).\
            join(Ecosystem).\
            filter(WorkerResult.error.is_(False)).\
            filter(WorkerResult.worker == 'github_details').\
            filter(Ecosystem.name == ecosystem)

        if from_date is not None:
            base_query = base_query.filter(Analysis.started_at > from_date).\
                order_by(desc(WorkerResult.id))

        if to_date is not None:
            base_query = base_query.filter(Analysis.started_at < to_date).\
                order_by(desc(WorkerResult.id))

        start = 0
        topics = []
        while True:
            try:
                results = base_query.slice(start, start + 10).all()
            except SQLAlchemyError:
                postgres.session.rollback()
                raise

            if not results:
                break

            self.log.info("Collecting topics, slice offset is %s", start)
            start += 10

            for entry in results:
                name = entry.package.name
                version = entry.version.identifier

                self.log.debug("Aggregating topics for %s/%s/%s", ecosystem,
                               name, version)

                task_result = entry.task_result
                if not postgres.is_real_task_result(task_result):
                    self.log.debug(
                        "Result was already stored on S3, retrieving from there"
                    )
                    try:
                        task_result = s3.retrieve_task_result(
                            ecosystem, name, version, 'github_details')
                    except:
                        self.log.exception(
                            "Failed to retrieve result 'github_details' from S3 "
                            "for %s/%s/%s", ecosystem, name, version)
                        continue

                topics.append({
                    'topics':
                    task_result.get('details', {}).get('topics'),
                    'name':
                    name,
                    'ecosystem':
                    ecosystem,
                    'version':
                    version
                })

        report = {
            'ecosystem': ecosystem,
            'bucket_name': bucket_name,
            'object_key': object_key,
            'from_date': str(from_date),
            'to_date': str(to_date),
            'result': topics
        }
        self._store_topics(bucket_name, object_key, report)
Esempio n. 46
0
def process_cve_item(item=None):
    if item is None:
        return None
    cve = {}
    cve['id'] = item['cve']['CVE_data_meta']['ID']
    cve['assigner'] = item['cve']['CVE_data_meta']['ASSIGNER']
    cve['Published'] = parse_datetime(item['publishedDate'], ignoretz=True)
    cve['Modified'] = parse_datetime(item['lastModifiedDate'], ignoretz=True)
    for description in item['cve']['description']['description_data']:
        if description['lang'] == 'en':
            if "summary" in cve:
                cve['summary'] += " {}".format(description['value'])
            else:
                cve['summary'] = description['value']
    if 'impact' in item:
        cve['access'] = {}
        cve['impact'] = {}
        if 'baseMetricV2' in item['impact']:
            cve['access']['authentication'] = item['impact']['baseMetricV2'][
                'cvssV2']['authentication']
            cve['access']['complexity'] = item['impact']['baseMetricV2'][
                'cvssV2']['accessComplexity']
            cve['access']['vector'] = item['impact']['baseMetricV2']['cvssV2'][
                'accessVector']
            cve['impact']['availability'] = item['impact']['baseMetricV2'][
                'cvssV2']['availabilityImpact']
            cve['impact']['confidentiality'] = item['impact']['baseMetricV2'][
                'cvssV2']['confidentialityImpact']
            cve['impact']['integrity'] = item['impact']['baseMetricV2'][
                'cvssV2']['integrityImpact']
            cve['cvss'] = float(
                item['impact']['baseMetricV2']['cvssV2']['baseScore'])
            cve['cvss-time'] = parse_datetime(
                item['lastModifiedDate'], ignoretz=True
            )  # NVD JSON lacks the CVSS time which was present in the original XML format
            cve['cvss-vector'] = item['impact']['baseMetricV2']['cvssV2'][
                'vectorString']
        else:
            cve['cvss'] = float(5)
    if 'references' in item['cve']:
        cve['references'] = []
        for ref in item['cve']['references']['reference_data']:
            cve['references'].append(ref['url'])
    if 'configurations' in item:
        cve['vulnerable_configuration'] = []
        cve['vulnerable_product'] = []
        for cpe in item['configurations']['nodes']:
            if 'cpe_match' in cpe:
                for cpeuri in cpe['cpe_match']:
                    if cpeuri['vulnerable']:
                        query, version_info = get_cpe_info(cpeuri)
                        if query != {}:
                            query["id"] = hashlib.sha1(
                                cpeuri["cpe23Uri"].encode("utf-8") +
                                version_info.encode("utf-8")).hexdigest()
                            cpe_info = db.getCPEVersionInformation(query)
                            if cpe_info:
                                if cpe_info["cpe_name"]:
                                    for vulnerable_version in cpe_info[
                                            "cpe_name"]:
                                        cve = add_if_missing(
                                            cve, "vulnerable_product",
                                            vulnerable_version["cpe23Uri"])
                                        cve = add_if_missing(
                                            cve, "vulnerable_configuration",
                                            vulnerable_version["cpe23Uri"])
                                else:
                                    cve = add_if_missing(
                                        cve, "vulnerable_product",
                                        cpeuri["cpe23Uri"])
                                    cve = add_if_missing(
                                        cve, "vulnerable_configuration",
                                        cpeuri["cpe23Uri"])
                        else:
                            # If the cpe_match did not have any of the version start/end modifiers,
                            # add the CPE string as it is.
                            cve = add_if_missing(cve, "vulnerable_product",
                                                 cpeuri["cpe23Uri"])
                            cve = add_if_missing(cve,
                                                 "vulnerable_configuration",
                                                 cpeuri["cpe23Uri"])
                    else:
                        cve = add_if_missing(cve, "vulnerable_configuration",
                                             cpeuri["cpe23Uri"])
            if 'children' in cpe:
                for child in cpe['children']:
                    if 'cpe_match' in child:
                        for cpeuri in child['cpe_match']:
                            if cpeuri['vulnerable']:
                                query, version_info = get_cpe_info(cpeuri)
                                if query != {}:
                                    query["id"] = hashlib.sha1(
                                        cpeuri["cpe23Uri"].encode("utf-8") +
                                        version_info.encode("utf-8")
                                    ).hexdigest()
                                    cpe_info = db.getCPEVersionInformation(
                                        query)
                                    if cpe_info:
                                        if cpe_info["cpe_name"]:
                                            for vulnerable_version in cpe_info[
                                                    "cpe_name"]:
                                                cve = add_if_missing(
                                                    cve, "vulnerable_product",
                                                    vulnerable_version[
                                                        "cpe23Uri"])
                                                cve = add_if_missing(
                                                    cve,
                                                    "vulnerable_configuration",
                                                    vulnerable_version[
                                                        "cpe23Uri"])
                                        else:
                                            cve = add_if_missing(
                                                cve, "vulnerable_product",
                                                cpeuri["cpe23Uri"])
                                            cve = add_if_missing(
                                                cve,
                                                "vulnerable_configuration",
                                                cpeuri["cpe23Uri"])
                                else:
                                    # If the cpe_match did not have any of the version start/end modifiers,
                                    # add the CPE string as it is.
                                    cve = add_if_missing(
                                        cve, "vulnerable_product",
                                        cpeuri["cpe23Uri"])
                                    cve = add_if_missing(
                                        cve, "vulnerable_configuration",
                                        cpeuri["cpe23Uri"])
                            else:
                                cve = add_if_missing(
                                    cve, "vulnerable_configuration",
                                    cpeuri["cpe23Uri"])
    if 'problemtype' in item['cve']:
        for problem in item['cve']['problemtype']['problemtype_data']:
            for cwe in problem[
                    'description']:  # NVD JSON not clear if we can get more than one CWE per CVE (until we take the last one) - NVD-CWE-Other??? list?
                if cwe['lang'] == 'en':
                    cve['cwe'] = cwe['value']
        if not ('cwe' in cve):
            cve['cwe'] = defaultvalue['cwe']
    else:
        cve['cwe'] = defaultvalue['cwe']
    cve['vulnerable_configuration_cpe_2_2'] = []
    return cve
Esempio n. 47
0
    def process_cve_item(self, item=None):
        if item is None:
            return None
        if "ASSIGNER" not in item["cve"]["CVE_data_meta"]:
            item["cve"]["CVE_data_meta"]["ASSIGNER"] = None

        cve = {
            "id":
            item["cve"]["CVE_data_meta"]["ID"],
            "assigner":
            item["cve"]["CVE_data_meta"]["ASSIGNER"],
            "Published":
            parse_datetime(item["publishedDate"], ignoretz=True),
            "Modified":
            parse_datetime(item["lastModifiedDate"], ignoretz=True),
            "last-modified":
            parse_datetime(item["lastModifiedDate"], ignoretz=True),
        }

        for description in item["cve"]["description"]["description_data"]:
            if description["lang"] == "en":
                if "summary" in cve:
                    cve["summary"] += " {}".format(description["value"])
                else:
                    cve["summary"] = description["value"]
        if "impact" in item:
            cve["access"] = {}
            cve["impact"] = {}
            if "baseMetricV3" in item["impact"]:
                cve["impact3"] = {}
                cve["exploitability3"] = {}
                cve["impact3"]["availability"] = item["impact"][
                    "baseMetricV3"]["cvssV3"]["availabilityImpact"]
                cve["impact3"]["confidentiality"] = item["impact"][
                    "baseMetricV3"]["cvssV3"]["confidentialityImpact"]
                cve["impact3"]["integrity"] = item["impact"]["baseMetricV3"][
                    "cvssV3"]["integrityImpact"]
                cve["exploitability3"]["attackvector"] = item["impact"][
                    "baseMetricV3"]["cvssV3"]["attackVector"]
                cve["exploitability3"]["attackcomplexity"] = item["impact"][
                    "baseMetricV3"]["cvssV3"]["attackComplexity"]
                cve["exploitability3"]["privilegesrequired"] = item["impact"][
                    "baseMetricV3"]["cvssV3"]["privilegesRequired"]
                cve["exploitability3"]["userinteraction"] = item["impact"][
                    "baseMetricV3"]["cvssV3"]["userInteraction"]
                cve["exploitability3"]["scope"] = item["impact"][
                    "baseMetricV3"]["cvssV3"]["scope"]
                cve["cvss3"] = float(
                    item["impact"]["baseMetricV3"]["cvssV3"]["baseScore"])
                cve["cvss3-vector"] = item["impact"]["baseMetricV3"]["cvssV3"][
                    "vectorString"]
                cve["impactScore3"] = float(
                    item["impact"]["baseMetricV3"]["impactScore"])
                cve["exploitabilityScore3"] = float(
                    item["impact"]["baseMetricV3"]["exploitabilityScore"])
            else:
                cve["cvss3"] = None
            if "baseMetricV2" in item["impact"]:
                cve["access"]["authentication"] = item["impact"][
                    "baseMetricV2"]["cvssV2"]["authentication"]
                cve["access"]["complexity"] = item["impact"]["baseMetricV2"][
                    "cvssV2"]["accessComplexity"]
                cve["access"]["vector"] = item["impact"]["baseMetricV2"][
                    "cvssV2"]["accessVector"]
                cve["impact"]["availability"] = item["impact"]["baseMetricV2"][
                    "cvssV2"]["availabilityImpact"]
                cve["impact"]["confidentiality"] = item["impact"][
                    "baseMetricV2"]["cvssV2"]["confidentialityImpact"]
                cve["impact"]["integrity"] = item["impact"]["baseMetricV2"][
                    "cvssV2"]["integrityImpact"]
                cve["cvss"] = float(
                    item["impact"]["baseMetricV2"]["cvssV2"]["baseScore"])
                cve["exploitabilityScore"] = float(
                    item["impact"]["baseMetricV2"]["exploitabilityScore"])
                cve["impactScore"] = float(
                    item["impact"]["baseMetricV2"]["impactScore"])
                cve["cvss-time"] = parse_datetime(
                    item["lastModifiedDate"], ignoretz=True
                )  # NVD JSON lacks the CVSS time which was present in the original XML format
                cve["cvss-vector"] = item["impact"]["baseMetricV2"]["cvssV2"][
                    "vectorString"]
            else:
                cve["cvss"] = None
        if "references" in item["cve"]:
            cve["references"] = []
            for ref in item["cve"]["references"]["reference_data"]:
                cve["references"].append(ref["url"])
        if "configurations" in item:
            cve["vulnerable_configuration"] = []
            cve["vulnerable_product"] = []
            cve["vendors"] = []
            cve["products"] = []
            cve["vulnerable_product_stems"] = []
            cve["vulnerable_configuration_stems"] = []
            for cpe in item["configurations"]["nodes"]:
                if "cpe_match" in cpe:
                    for cpeuri in cpe["cpe_match"]:
                        if "cpe23Uri" not in cpeuri:
                            continue
                        if cpeuri["vulnerable"]:
                            query, version_info = self.get_cpe_info(cpeuri)
                            if query != {}:
                                query["id"] = hashlib.sha1(
                                    cpeuri["cpe23Uri"].encode("utf-8") +
                                    version_info.encode("utf-8")).hexdigest()
                                cpe_info = getCPEVersionInformation(query)
                                if cpe_info:
                                    if cpe_info["cpe_name"]:
                                        for vulnerable_version in cpe_info[
                                                "cpe_name"]:
                                            cve = self.add_if_missing(
                                                cve,
                                                "vulnerable_product",
                                                vulnerable_version["cpe23Uri"],
                                            )
                                            cve = self.add_if_missing(
                                                cve,
                                                "vulnerable_configuration",
                                                vulnerable_version["cpe23Uri"],
                                            )
                                            cve = self.add_if_missing(
                                                cve,
                                                "vulnerable_configuration_stems",
                                                self.stem(vulnerable_version[
                                                    "cpe23Uri"]),
                                            )
                                            vendor, product = self.get_vendor_product(
                                                vulnerable_version["cpe23Uri"])
                                            cve = self.add_if_missing(
                                                cve, "vendors", vendor)
                                            cve = self.add_if_missing(
                                                cve, "products", product)
                                            cve = self.add_if_missing(
                                                cve,
                                                "vulnerable_product_stems",
                                                self.stem(vulnerable_version[
                                                    "cpe23Uri"]),
                                            )
                                    else:
                                        cve = self.add_if_missing(
                                            cve,
                                            "vulnerable_product",
                                            cpeuri["cpe23Uri"],
                                        )
                                        cve = self.add_if_missing(
                                            cve,
                                            "vulnerable_configuration",
                                            cpeuri["cpe23Uri"],
                                        )
                                        cve = self.add_if_missing(
                                            cve,
                                            "vulnerable_configuration_stems",
                                            self.stem(cpeuri["cpe23Uri"]),
                                        )
                                        vendor, product = self.get_vendor_product(
                                            cpeuri["cpe23Uri"])
                                        cve = self.add_if_missing(
                                            cve, "vendors", vendor)
                                        cve = self.add_if_missing(
                                            cve, "products", product)
                                        cve = self.add_if_missing(
                                            cve,
                                            "vulnerable_product_stems",
                                            self.stem(cpeuri["cpe23Uri"]),
                                        )
                            else:
                                # If the cpe_match did not have any of the version start/end modifiers,
                                # add the CPE string as it is.
                                cve = self.add_if_missing(
                                    cve, "vulnerable_product",
                                    cpeuri["cpe23Uri"])
                                cve = self.add_if_missing(
                                    cve, "vulnerable_configuration",
                                    cpeuri["cpe23Uri"])
                                cve = self.add_if_missing(
                                    cve,
                                    "vulnerable_configuration_stems",
                                    self.stem(cpeuri["cpe23Uri"]),
                                )
                                vendor, product = self.get_vendor_product(
                                    cpeuri["cpe23Uri"])
                                cve = self.add_if_missing(
                                    cve, "vendors", vendor)
                                cve = self.add_if_missing(
                                    cve, "products", product)
                                cve = self.add_if_missing(
                                    cve,
                                    "vulnerable_product_stems",
                                    self.stem(cpeuri["cpe23Uri"]),
                                )
                        else:
                            cve = self.add_if_missing(
                                cve, "vulnerable_configuration",
                                cpeuri["cpe23Uri"])
                            cve = self.add_if_missing(
                                cve,
                                "vulnerable_configuration_stems",
                                self.stem(cpeuri["cpe23Uri"]),
                            )
                if "children" in cpe:
                    for child in cpe["children"]:
                        if "cpe_match" in child:
                            for cpeuri in child["cpe_match"]:
                                if "cpe23Uri" not in cpeuri:
                                    continue
                                if cpeuri["vulnerable"]:
                                    query, version_info = self.get_cpe_info(
                                        cpeuri)
                                    if query != {}:
                                        query["id"] = hashlib.sha1(
                                            cpeuri["cpe23Uri"].encode("utf-8")
                                            + version_info.encode("utf-8")
                                        ).hexdigest()
                                        cpe_info = getCPEVersionInformation(
                                            query)
                                        if cpe_info:
                                            if cpe_info["cpe_name"]:
                                                for vulnerable_version in cpe_info[
                                                        "cpe_name"]:
                                                    cve = self.add_if_missing(
                                                        cve,
                                                        "vulnerable_product",
                                                        vulnerable_version[
                                                            "cpe23Uri"],
                                                    )
                                                    cve = self.add_if_missing(
                                                        cve,
                                                        "vulnerable_configuration",
                                                        vulnerable_version[
                                                            "cpe23Uri"],
                                                    )
                                                    cve = self.add_if_missing(
                                                        cve,
                                                        "vulnerable_configuration_stems",
                                                        self.stem(
                                                            vulnerable_version[
                                                                "cpe23Uri"]),
                                                    )
                                                    (
                                                        vendor,
                                                        product,
                                                    ) = self.get_vendor_product(
                                                        vulnerable_version[
                                                            "cpe23Uri"])
                                                    cve = self.add_if_missing(
                                                        cve, "vendors", vendor)
                                                    cve = self.add_if_missing(
                                                        cve, "products",
                                                        product)
                                                    cve = self.add_if_missing(
                                                        cve,
                                                        "vulnerable_product_stems",
                                                        self.stem(
                                                            vulnerable_version[
                                                                "cpe23Uri"]),
                                                    )
                                            else:
                                                cve = self.add_if_missing(
                                                    cve,
                                                    "vulnerable_product",
                                                    cpeuri["cpe23Uri"],
                                                )
                                                cve = self.add_if_missing(
                                                    cve,
                                                    "vulnerable_configuration",
                                                    cpeuri["cpe23Uri"],
                                                )
                                                cve = self.add_if_missing(
                                                    cve,
                                                    "vulnerable_configuration_stems",
                                                    self.stem(
                                                        cpeuri["cpe23Uri"]),
                                                )
                                                (
                                                    vendor,
                                                    product,
                                                ) = self.get_vendor_product(
                                                    cpeuri["cpe23Uri"])
                                                cve = self.add_if_missing(
                                                    cve, "vendors", vendor)
                                                cve = self.add_if_missing(
                                                    cve, "products", product)
                                                cve = self.add_if_missing(
                                                    cve,
                                                    "vulnerable_product_stems",
                                                    self.stem(
                                                        cpeuri["cpe23Uri"]),
                                                )
                                    else:
                                        # If the cpe_match did not have any of the version start/end modifiers,
                                        # add the CPE string as it is.
                                        if "cpe23Uri" not in cpeuri:
                                            continue
                                        cve = self.add_if_missing(
                                            cve,
                                            "vulnerable_product",
                                            cpeuri["cpe23Uri"],
                                        )
                                        cve = self.add_if_missing(
                                            cve,
                                            "vulnerable_configuration",
                                            cpeuri["cpe23Uri"],
                                        )
                                        cve = self.add_if_missing(
                                            cve,
                                            "vulnerable_configuration_stems",
                                            self.stem(cpeuri["cpe23Uri"]),
                                        )
                                        vendor, product = self.get_vendor_product(
                                            cpeuri["cpe23Uri"])
                                        cve = self.add_if_missing(
                                            cve, "vendors", vendor)
                                        cve = self.add_if_missing(
                                            cve, "products", product)
                                        cve = self.add_if_missing(
                                            cve,
                                            "vulnerable_product_stems",
                                            self.stem(cpeuri["cpe23Uri"]),
                                        )
                                else:
                                    if "cpe23Uri" not in cpeuri:
                                        continue
                                    cve = self.add_if_missing(
                                        cve,
                                        "vulnerable_configuration",
                                        cpeuri["cpe23Uri"],
                                    )
                                    cve = self.add_if_missing(
                                        cve,
                                        "vulnerable_configuration_stems",
                                        self.stem(cpeuri["cpe23Uri"]),
                                    )
        if "problemtype" in item["cve"]:
            for problem in item["cve"]["problemtype"]["problemtype_data"]:
                for cwe in problem[
                        "description"]:  # NVD JSON not clear if we can get more than one CWE per CVE (until we take the last one) -
                    # NVD-CWE-Other??? list?
                    if cwe["lang"] == "en":
                        cve["cwe"] = cwe["value"]
            if not ("cwe" in cve):
                cve["cwe"] = defaultvalue["cwe"]
        else:
            cve["cwe"] = defaultvalue["cwe"]
        cve["vulnerable_configuration_cpe_2_2"] = []
        return cve
Esempio n. 48
0
def update_collections():
    file_prefix = "nvdcve-2.0-"
    file_suffix = ".xml.gz"
    file_mod = "modified"
    file_rec = "recent"
    getfile = file_prefix + file_mod + file_suffix
    try:
        (f,
         r) = Configuration.getFile(Configuration.getFeedURL('cve') + getfile)
    except:
        sys.exit(
            "Cannot open url %s. Bad URL or not connected to the internet?" %
            (Configuration.getFeedURL("cve") + getfile))
    i = cve_db.getInfo("cves")
    last_modified = parse_datetime(r.headers['last-modified'], ignoretz=True)
    if i is not None:
        if last_modified == i['last-modified']:
            logger.info("Not modified")
            return "Not modified"
    cve_db.setColUpdate("cves", last_modified)
    parser = make_parser()
    ch = CVEHandler()
    parser.setContentHandler(ch)
    parser.parse(f)
    for item in ch.cves:
        # check if the CVE already exists.
        x = cve_db.getCVE(item['id'])
        # if so, update the entry.
        if x:
            if 'cvss' not in item:
                item['cvss'] = None
            if 'cwe' not in item:
                item['cwe'] = defaultvalue['cwe']
            cve_db.updateCVE(item)
        else:
            cve_db.insertCVE(item)
    # get the 'recent' file
    getfile = file_prefix + file_rec + file_suffix
    try:
        (f,
         r) = Configuration.getFile(Configuration.getFeedURL('cve') + getfile)
    except:
        sys.exit(
            "Cannot open url %s. Bad URL or not connected to the internet?" %
            (Configuration.getFeedURL("cve") + getfile))
    parser = make_parser()
    ch = CVEHandler()
    parser.setContentHandler(ch)
    parser.parse(f)
    for item in progressbar(ch.cves):
        # check if the CVE already exists.
        x = cve_db.getCVE(item['id'])
        # if so, update the entry.
        if x:
            if args.v:
                logger.info("item found : " + item['id'])
            if 'cvss' not in item:
                item['cvss'] = None
            else:
                item['cvss'] = float(item['cvss'])
            if 'cwe' not in item:
                item['cwe'] = defaultvalue['cwe']
            cve_db.updateCVE(item)
        # if not, create it.
        else:
            cve_db.insertCVE(item)
    return 'success'
Esempio n. 49
0
class TomlTokenizer(object):
    PATTERNS = (
        ('bool', re.compile('(true|false)'), lambda x: x == 'true'),
        ('comment', re.compile(r'(#[\s\S]*)'), lambda x: x[1:].strip()),
        ('id', re.compile(r'([_a-zA-Z][a-zA-Z0-9_]*)'), None),
        ('section',
         re.compile(
             r'(\[[_a-zA-Z][a-zA-Z0-9_]*(\.[_a-zA-Z][a-zA-Z0-9_]*)*\])'),
         lambda x: x[1:-1].strip()),
        ('string', re.compile('("[^"]*")'),
         lambda x: unescape(x[1:-1].strip())),
        ('whitespace', re.compile('(\s+)'), lambda x: None),
        ('literal', re.compile(r'([,\[\]=])'), None),
        ('datetime',
         re.compile(
             '(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}([-+]\d{2}:?\d{2}|Z))'),
         lambda x: parse_datetime(x)),
        ('float', re.compile('(\d+\.\d+)'), lambda x: float(x)),
        ('int', re.compile('(\d+)'), lambda x: int(x)),
    )
    LOGGER_NAME = 'tomless.tokenizer'

    @classmethod
    def tokenize_line(cls, line, line_no):
        offset = 0
        logger = logging.getLogger(cls.LOGGER_NAME)
        logger.debug('tokenlizing line {} {}'.format(line_no, line))
        line = line.strip()
        while offset < len(line):
            current_offset = offset
            for t_type, pattern, processor in cls.PATTERNS:
                logger.debug('try pattern {} at offset {}'.format(
                    t_type, offset))
                match = pattern.match(line, offset)
                if not match:
                    logger.debug('no match')
                    continue
                content = match.group(0)
                content_length = len(content)
                val = processor(content) if processor else content
                if t_type != 'whitespace':
                    yield TomlToken(val if t_type == 'literal' else t_type,
                                    val, line_no, offset)
                offset += content_length
                logger.debug('matched pattern {} {} ({})'.format(
                    t_type, content, content_length))
            if current_offset == offset:
                raise Exception('lex error at line {} {}: {}'.format(
                    line_no, current_offset, line))
            logger.debug('check eol:{} {} {}'.format(offset, len(line),
                                                     line[offset:]))

    @classmethod
    def tokenize_content(cls, content):
        for i, line in enumerate(content.strip().splitlines()):
            for token in cls.tokenize_line(line, i + 1):
                yield token

    @classmethod
    def tokenize_file(cls, filename):
        content = ''
        with open(filename) as f:
            content = f.read()
        return list(cls.tokenize_content(content))
Esempio n. 50
0
def get_snuba_translators(filter_keys, is_grouprelease=False):
    """
    Some models are stored differently in snuba, eg. as the environment
    name instead of the the environment ID. Here we create and return forward()
    and reverse() translation functions that perform all the required changes.

    forward() is designed to work on the filter_keys and so should be called
    with a map of {column: [key1, key2], ...} and should return an updated map
    with the filter keys replaced with the ones that Snuba expects.

    reverse() is designed to work on result rows, so should be called with a row
    in the form {column: value, ...} and will return a translated result row.

    Because translation can potentially rely on combinations of different parts
    of the result row, I decided to implement them as composable functions over the
    row to be translated. This should make it simpler to add any other needed
    translations as long as you can express them as forward(filters) and reverse(row)
    functions.
    """

    # Helper lambdas to compose translator functions
    identity = (lambda x: x)
    compose = (lambda f, g: lambda x: f(g(x)))
    replace = (lambda d, key, val: d.update({key: val}) or d)

    forward = identity
    reverse = identity

    map_columns = {
        'environment': (Environment, 'name', lambda name: None
                        if name == '' else name),
        'tags[sentry:release]': (Release, 'version', identity),
    }

    for col, (model, field, fmt) in six.iteritems(map_columns):
        fwd, rev = None, None
        ids = filter_keys.get(col)
        if not ids:
            continue
        if is_grouprelease and col == "tags[sentry:release]":
            # GroupRelease -> Release translation is a special case because the
            # translation relies on both the Group and Release value in the result row.
            #
            # We create a map of {grouprelease_id: (group_id, version), ...} and the corresponding
            # reverse map of {(group_id, version): grouprelease_id, ...}
            # NB this does depend on `issue` being defined in the query result, and the correct
            # set of issues being resolved, which is outside the control of this function.
            gr_map = GroupRelease.objects.filter(id__in=ids).values_list(
                "id", "group_id", "release_id")
            ver = dict(
                Release.objects.filter(id__in=[x[2]
                                               for x in gr_map]).values_list(
                                                   "id", "version"))
            fwd_map = {
                gr: (group, ver[release])
                for (gr, group, release) in gr_map
            }
            rev_map = dict(reversed(t) for t in six.iteritems(fwd_map))
            fwd = (lambda col, trans: lambda filters: replace(
                filters, col, [trans[k][1] for k in filters[col]]))(col,
                                                                    fwd_map)
            rev = (
                lambda col, trans: lambda row: replace(
                    # The translate map may not have every combination of issue/release
                    # returned by the query.
                    row,
                    col,
                    trans.get((row["issue"], row[col]))))(col, rev_map)

        else:
            fwd_map = {
                k: fmt(v)
                for k, v in model.objects.filter(
                    id__in=ids).values_list("id", field)
            }
            rev_map = dict(reversed(t) for t in six.iteritems(fwd_map))
            fwd = (lambda col, trans: lambda filters: replace(
                filters, col, [trans[k] for k in filters[col] if k]))(col,
                                                                      fwd_map)
            rev = (lambda col, trans: lambda row: replace(
                row, col, trans[row[col]])
                   if col in row else row)(col, rev_map)

        if fwd:
            forward = compose(forward, fwd)
        if rev:
            reverse = compose(reverse, rev)

    # Extra reverse translator for time column.
    reverse = compose(
        reverse,
        lambda row: replace(row, "time",
                            int(to_timestamp(parse_datetime(row["time"]))))
        if "time" in row else row,
    )

    return (forward, reverse)
    def construct_package_query(cls, input_json):
        """Construct the query to retrieve detailed information of given package."""
        # TODO: reduce cyclomatic complexity
        # see https://fabric8-analytics.github.io/dashboard/fabric8-analytics-data-model.cc.D.html
        # issue: https://github.com/fabric8-analytics/fabric8-analytics-data-model/issues/232
        pkg_name = input_json.get('package')
        ecosystem = input_json.get('ecosystem')
        pkg_name_tokens = re.split(r'\W+', pkg_name)
        prp_package = ""
        drop_prop = ""
        drop_props = []
        # TODO: refactor into the separate module
        str_package = "pkg = g.V().has('ecosystem','{ecosystem}').has('name', '{pkg_name}')." \
                      "tryNext().orElseGet{{g.V()." \
                      "has('vertex_label','Count').choose(has('{ecosystem}_pkg_count')," \
                      "sack(assign).by('{ecosystem}_pkg_count').sack(sum).by(constant(" \
                      "1)).property('{ecosystem}_pkg_count',sack())," \
                      "property('{ecosystem}_pkg_count',1)).iterate();" \
                      "graph.addVertex('ecosystem', '{ecosystem}', 'name', " \
                      "'{pkg_name}', 'vertex_label', 'Package'); }};" \
                      "pkg.property('last_updated', {last_updated});".format(
                        ecosystem=ecosystem, pkg_name=pkg_name, last_updated=str(time.time()))
        cur_latest_ver, cur_libio_latest_ver = get_current_version(
            ecosystem, pkg_name)
        cur_date = (datetime.utcnow()).strftime('%Y%m%d')
        last_updated_flag = 'false'
        latest_version = cls.sanitize_text_for_query(
            input_json.get('latest_version'))

        if latest_version:
            # If latest version dont have cve, then it becomes the latest non cve version as well
            non_cve_ver = get_latest_version_non_cve(ecosystem, pkg_name,
                                                     latest_version)
            prp_package += "pkg.property('latest_non_cve_version', '{}');".format(
                non_cve_ver)
            prp_package += "pkg.property('latest_version', '{}');".format(
                latest_version)
            if latest_version != cur_latest_ver:
                prp_package += "pkg.property('latest_version_last_updated', '{}');".format(
                    cur_date)
                last_updated_flag = 'true'

        # Get Github Details
        if 'github_details' in input_json.get('analyses', {}):
            gh_details = input_json.get('analyses').get('github_details').get(
                'details', {})
            gh_prs_last_year_opened = str(
                gh_details.get('updated_pull_requests',
                               {}).get('year', {}).get('opened', -1))
            gh_prs_last_month_opened = str(
                gh_details.get('updated_pull_requests',
                               {}).get('month', {}).get('opened', -1))
            gh_prs_last_year_closed = str(
                gh_details.get('updated_pull_requests',
                               {}).get('year', {}).get('closed', -1))
            gh_prs_last_month_closed = str(
                gh_details.get('updated_pull_requests',
                               {}).get('month', {}).get('closed', -1))
            gh_issues_last_year_opened = str(
                gh_details.get('updated_issues', {}).get('year',
                                                         {}).get('opened', -1))
            gh_issues_last_month_opened = str(
                gh_details.get('updated_issues', {}).get('month',
                                                         {}).get('opened', -1))
            gh_issues_last_year_closed = str(
                gh_details.get('updated_issues', {}).get('year',
                                                         {}).get('closed', -1))
            gh_issues_last_month_closed = str(
                gh_details.get('updated_issues', {}).get('month',
                                                         {}).get('closed', -1))
            gh_forks = str(gh_details.get('forks_count', -1))
            gh_refreshed_on = gh_details.get('updated_on')
            gh_stargazers = str(gh_details.get('stargazers_count', -1))
            gh_open_issues_count = str(gh_details.get('open_issues_count', -1))
            gh_subscribers_count = str(gh_details.get('subscribers_count', -1))
            gh_contributors_count = str(
                gh_details.get('contributors_count', -1))
            topics = gh_details.get('topics', [])

            # TODO: refactor into the separate module
            prp_package += "pkg.property('gh_prs_last_year_opened', {gh_prs_last_year_opened});" \
                           "pkg.property('gh_prs_last_month_opened', {gh_prs_last_month_opened});" \
                           "pkg.property('gh_prs_last_year_closed', {gh_prs_last_year_closed});" \
                           "pkg.property('gh_prs_last_month_closed', {gh_prs_last_month_closed});" \
                           "pkg.property('gh_issues_last_year_opened', " \
                           "{gh_issues_last_year_opened});" \
                           "pkg.property('gh_issues_last_month_opened', " \
                           "{gh_issues_last_month_opened});" \
                           "pkg.property('gh_issues_last_year_closed', " \
                           "{gh_issues_last_year_closed});" \
                           "pkg.property('gh_issues_last_month_closed', " \
                           "{gh_issues_last_month_closed});" \
                           "pkg.property('gh_forks', {gh_forks});" \
                           "pkg.property('gh_refreshed_on', '{gh_refreshed_on}');" \
                           "pkg.property('gh_stargazers', {gh_stargazers});" \
                           "pkg.property('gh_open_issues_count', {gh_open_issues_count});" \
                           "pkg.property('gh_subscribers_count', {gh_subscribers_count});" \
                           "pkg.property('gh_contributors_count', {gh_contributors_count});".format(
                            gh_prs_last_year_opened=gh_prs_last_year_opened,
                            gh_prs_last_month_opened=gh_prs_last_month_opened,
                            gh_prs_last_year_closed=gh_prs_last_year_closed,
                            gh_prs_last_month_closed=gh_prs_last_month_closed,
                            gh_issues_last_year_opened=gh_issues_last_year_opened,
                            gh_issues_last_month_opened=gh_issues_last_month_opened,
                            gh_issues_last_year_closed=gh_issues_last_year_closed,
                            gh_issues_last_month_closed=gh_issues_last_month_closed,
                            gh_forks=gh_forks, gh_stargazers=gh_stargazers,
                            gh_refreshed_on=gh_refreshed_on,
                            gh_open_issues_count=gh_open_issues_count,
                            gh_subscribers_count=gh_subscribers_count,
                            gh_contributors_count=gh_contributors_count)

            # Add github topics
            if topics:
                drop_props.append('topics')
                str_package += " ".join([
                    "pkg.property('topics', '{}');".format(t) for t in topics
                    if t
                ])

        # Add tokens for a package
        if pkg_name_tokens:
            drop_props.append('tokens')
            str_package += " ".join([
                "pkg.property('tokens', '{}');".format(t)
                for t in pkg_name_tokens if t
            ])

        # Get Libraries.io data
        if 'libraries_io' in input_json.get('analyses', {}):
            v2 = input_json['analyses']['libraries_io'].get('schema', {}).get('version', '0-0-0') \
                 >= '2-0-0'
            details = input_json['analyses']['libraries_io'].get('details', {})
            libio_dependents_projects = details.get('dependents',
                                                    {}).get('count', -1)
            libio_dependents_repos = details.get('dependent_repositories',
                                                 {}).get('count', -1)
            releases = details.get('releases', {})
            libio_total_releases = int(releases.get('count', -1))
            libio_latest_version = libio_latest_published_at = ''
            if libio_total_releases > 0:
                if v2:
                    libio_latest = releases.get('recent',
                                                [{}])[-1]  # last is latest
                    libio_latest_published_at = libio_latest.get(
                        'published_at', '')
                    libio_latest_version = libio_latest.get('number', '')
                else:
                    libio_latest_published_at = releases.get('latest', {}).get(
                        'published_at', '')
                    libio_latest_version = releases.get('latest',
                                                        {}).get('version', '')

                if libio_latest_version != cur_libio_latest_ver and last_updated_flag != 'true':
                    prp_package += "pkg.property('latest_version_last_updated', '{}');" \
                        .format(cur_date)

            if libio_latest_published_at:
                t = libio_latest_published_at
                p = parse_datetime(t).timetuple() if t else ''
                published_at = str(time.mktime(p)) if p else ''
                prp_package += "pkg.property('libio_latest_release', '{}');".format(
                    published_at)

            if details.get('dependent_repositories', {}).get('top'):
                drop_props.append('libio_usedby')
                for key, val in details.get('dependent_repositories',
                                            {}).get('top', {}).items():
                    prp_package += "pkg.property('libio_usedby', '{key}:{val}');".format(
                        key=key, val=val)

            prp_package += "pkg.property('libio_dependents_projects', " \
                           "'{libio_dependents_projects}');" \
                           "pkg.property('libio_dependents_repos', '{libio_dependents_repos}');" \
                           "pkg.property('libio_total_releases', '{libio_total_releases}');" \
                           "pkg.property('libio_latest_version', '{libio_latest_version}');".format(
                            libio_dependents_projects=libio_dependents_projects,
                            libio_dependents_repos=libio_dependents_repos,
                            libio_total_releases=libio_total_releases,
                            libio_latest_version=libio_latest_version)

            # Update EPV Github Release Date based on libraries_io data
            if v2:
                # 'recent' is list of {'number':n, 'published_at':p} including the latest
                for release in releases.get('recent', []):
                    rel_published = release.get('published_at', '')
                    parsed_dt = parse_datetime(
                        rel_published).timetuple() if rel_published else ''
                    timestamp = time.mktime(parsed_dt) if parsed_dt else ''

                    prp_package += "g.V().has('pecosystem','{ecosystem}').has('pname'," \
                                   "'{pkg_name}').has('version','{version}')." \
                                   "property('gh_release_date',{gh_rel});".format(
                                    ecosystem=ecosystem, pkg_name=pkg_name,
                                    version=release.get('number', ''),
                                    gh_rel=str(timestamp))
            else:
                if libio_latest_published_at:
                    gh_release = time.mktime(
                        parse_datetime(libio_latest_published_at).timetuple())
                    prp_package += "g.V().has('pecosystem','{ecosystem}').has('pname'," \
                                   "'{pkg_name}')." \
                                   "has('version','{libio_latest_version}')." \
                                   "property('gh_release_date', {gh_rel});".format(
                                    pkg_name=pkg_name, ecosystem=ecosystem,
                                    libio_latest_version=libio_latest_version,
                                    gh_rel=str(gh_release))
                for version, release in releases.get('latest',
                                                     {}).get('recent',
                                                             {}).items():
                    prp_package += "g.V().has('pecosystem','{ecosystem}').has('pname'," \
                                   "'{pkg_name}').has('version','{version}')." \
                                   "property('gh_release_date',{gh_rel});".format(
                                    ecosystem=ecosystem, pkg_name=pkg_name, version=version,
                                    gh_rel=str(time.mktime(parse_datetime(release).timetuple())))

        # Refresh the properties whereever applicable
        if len(drop_props) > 0:
            drop_prop += "g.V().has('ecosystem','{ecosystem}').has('name'," \
                         "'{pkg_name}').properties('{p}').drop().iterate();".format(
                            ecosystem=ecosystem, pkg_name=pkg_name, p="','".join(drop_props))

        return drop_prop + str_package, prp_package
Esempio n. 52
0
def from_iso8601(value):
    return parse_datetime(value)
Esempio n. 53
0
    def execute(self, ecosystem, bucket_name, object_key, from_date=None, to_date=None):
        """Aggregate gathered topics and store them on S3.
        
        :param ecosystem: ecosystem name for which topics should be gathered
        :param bucket_name: name of the destination bucket to which topics should be stored
        :param object_key: name of the object under which aggregated topics should be stored
        :param from_date: date limitation for task result queries
        :param to_date: date limitation for taks result queries
        """
        if from_date is not None:
            from_date = parse_datetime(from_date)
        if to_date is not None:
            to_date = parse_datetime(to_date)

        s3 = StoragePool.get_connected_storage('S3Data')
        # TODO: this will need to be changed once we will introduce package level flows
        postgres = StoragePool.get_connected_storage('BayesianPostgres')

        base_query = postgres.session.query(WorkerResult).\
            join(Analysis).\
            filter(WorkerResult.error.is_(False)).\
            filter(WorkerResult.worker == 'github_details')

        if from_date is not None:
            base_query = base_query.filter(Analysis.started_at > from_date).\
                order_by(desc(WorkerResult.id))

        if to_date is not None:
            base_query = base_query.filter(Analysis.started_at < to_date).\
                order_by(desc(WorkerResult.id))

        start = 0
        topics = []
        while True:
            results = base_query.slice(start, start + 10).all()

            if not results:
                break

            self.log.info("Collecting topics, slice offset is %s", start)
            start += 10

            for entry in results:
                name = entry.package.name
                version = entry.package.version

                task_result = entry.task_result
                if not postgres.is_real_task_result(task_result):
                    task_result = s3.retrieve_task_result(ecosystem, name, version, 'github_details')

                topics.append({
                    'topics': task_result.get('details', {}).get('topics'),
                    'name': name,
                    'ecosystem': ecosystem,
                    'version': version
                })

        report = {
            'ecosystem': ecosystem,
            'bucket_name': bucket_name,
            'object_key': object_key,
            'from_date': str(from_date),
            'to_date': str(to_date),
            'result': topics
        }
        self._store_topics(bucket_name, object_key, report)
Esempio n. 54
0
    def __get_tag_key_and_top_values(
        self, project_id, group_id, environment_id, key, limit=3, raise_on_empty=True, **kwargs
    ):

        tag = u"tags[{}]".format(key)
        filters = {"project_id": get_project_list(project_id)}
        if environment_id:
            filters["environment"] = [environment_id]
        if group_id is not None:
            filters["group_id"] = [group_id]
        conditions = kwargs.get("conditions", [])
        aggregations = kwargs.get("aggregations", [])

        conditions.append([tag, "!=", ""])
        aggregations += [
            ["uniq", tag, "values_seen"],
            ["count()", "", "count"],
            ["min", SEEN_COLUMN, "first_seen"],
            ["max", SEEN_COLUMN, "last_seen"],
        ]

        result, totals = snuba.query(
            start=kwargs.get("start"),
            end=kwargs.get("end"),
            groupby=[tag],
            conditions=conditions,
            filter_keys=filters,
            aggregations=aggregations,
            orderby="-count",
            limit=limit,
            totals=True,
            referrer="tagstore.__get_tag_key_and_top_values",
        )

        if raise_on_empty and (not result or totals.get("count", 0) == 0):
            raise TagKeyNotFound if group_id is None else GroupTagKeyNotFound
        else:
            if group_id is None:
                key_ctor = TagKey
                value_ctor = TagValue
            else:
                key_ctor = functools.partial(GroupTagKey, group_id=group_id)
                value_ctor = functools.partial(GroupTagValue, group_id=group_id)

            top_values = [
                value_ctor(
                    key=key,
                    value=value,
                    times_seen=data["count"],
                    first_seen=parse_datetime(data["first_seen"]),
                    last_seen=parse_datetime(data["last_seen"]),
                )
                for value, data in six.iteritems(result)
            ]

            return key_ctor(
                key=key,
                values_seen=totals.get("values_seen", 0),
                count=totals.get("count", 0),
                top_values=top_values,
            )
Esempio n. 55
0
    def get_group_tag_keys_and_top_values(
            self,
            project_id,
            group_id,
            environment_id,
            user=None,
            keys=None,
            value_limit=TOP_VALUES_DEFAULT_LIMIT):
        # Similar to __get_tag_key_and_top_values except we get the top values
        # for all the keys provided. value_limit in this case means the number
        # of top values for each key, so the total rows returned should be
        # num_keys * limit.
        start, end = self.get_time_range()

        # First get totals and unique counts by key.
        keys_with_counts = self.get_group_tag_keys(project_id,
                                                   group_id,
                                                   environment_id,
                                                   keys=keys)

        # Then get the top values with first_seen/last_seen/count for each
        filters = {
            'project_id': [project_id],
        }
        if environment_id:
            filters['environment'] = [environment_id]
        if keys is not None:
            filters['tags_key'] = keys
        if group_id is not None:
            filters['issue'] = [group_id]

        aggregations = [
            ['count()', '', 'count'],
            ['min', SEEN_COLUMN, 'first_seen'],
            ['max', SEEN_COLUMN, 'last_seen'],
        ]

        values_by_key = snuba.query(
            start,
            end, ['tags_key', 'tags_value'],
            None,
            filters,
            aggregations,
            orderby='-count',
            limitby=[value_limit, 'tags_key'],
            referrer='tagstore.__get_tag_keys_and_top_values')

        # Then supplement the key objects with the top values for each.
        if group_id is None:
            value_ctor = TagValue
        else:
            value_ctor = functools.partial(GroupTagValue, group_id=group_id)

        for keyobj in keys_with_counts:
            key = keyobj.key
            values = values_by_key.get(key, [])
            keyobj.top_values = [
                value_ctor(
                    key=keyobj.key,
                    value=value,
                    times_seen=data['count'],
                    first_seen=parse_datetime(data['first_seen']),
                    last_seen=parse_datetime(data['last_seen']),
                ) for value, data in six.iteritems(values)
            ]

        return keys_with_counts
Esempio n. 56
0
 def decode(self, string):
     return parse_datetime(string)
Esempio n. 57
0
def gitstats_per_user(path,
                      recursive=False,
                      since=None,
                      until=None,
                      authors_emails={},
                      use_paths=None,
                      filterby_emails=None):
    """
    :param str path: Path to analyse. 
                     If the recursive is false, 
                     it should be the path o a repository.
                     If the recursive is true, 
                     it should be the parent path of several repositories
    :param bool recursive: Indicate if it should read the stats from the path directory
                           or the subdirectories in the path.    
    
    :return:
        A dictionary with the format
        {
            'author': [{'date':..., 'files updated':..., 'insertions':..., 'deletions':...}],
            ...
        }
    """
    if use_paths is None:
        directories = [path] if not recursive else find_gitrepos(path)
    else:
        directories = use_paths

    authors = {}
    emails = {}
    for directory in directories:
        print(directory)

        command = ["git", "log", "--shortstat", "--all"]
        if since: command += ['--since', since.strftime('%Y.%m.%d')]
        if until: command += ['--until', until.strftime('%Y.%m.%d')]

        p = subprocess.Popen(command,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE,
                             cwd=directory)
        output, _ = p.communicate()
        output = output.decode()

        for commit in output.split('commit ')[1:]:
            lines = commit.split('\n')

            deletions = 0
            insertions = 0
            files_changed = 0

            for row in lines:

                if row.startswith('Author:'):
                    author = row[8:]
                    author = parse("{} <{}>", author)
                    author, email = author.fixed
                    emails[author] = email
                    if email in authors_emails:
                        author = authors_emails[email]
                elif row.startswith('Date:'):
                    date = parse_datetime(row[8:])
                elif ' changed' in row:
                    data = row.strip().split(',')
                    for v in data:
                        v = v.strip()

                        res = parse("{:d} deletions(-)", v)
                        if res:
                            deletions = res.fixed[0]
                        else:
                            res = parse("{:d} deletion(-)", v)
                            if res:
                                deletions = res.fixed[0]

                        res = parse("{:d} files changed", v)
                        if res:
                            files_changed = res.fixed[0]
                        else:
                            res = parse("{:d} file changed", v)
                            if res:
                                files_changed = res.fixed[0]

                        res = parse("{:d} insertions(+)", v)
                        if res:
                            insertions = res.fixed[0]
                        else:
                            res = parse("{:d} insertion(+)", v)
                            if res:
                                insertions = res.fixed[0]

            if filterby_emails and email not in filterby_emails:
                continue

            if author not in authors: authors[author] = []

            authors[author].append({
                'date': date,
                'files changed': files_changed,
                'deletions': deletions,
                'insertions': insertions
            })

        for author, commits in authors.items():
            authors[author] = sorted(commits, key=lambda x: x['date'])

    return authors, directories, emails
Esempio n. 58
0
    def run(self, input_data, modify_light_metadata):
        """
        # Runs the stats generation phase
        # This shouldn't alter the columns themselves, but rather provide the `stats` metadata object and update the types for each column
        # A lot of information about the data distribution and quality will  also be logged to the server in this phase
        """
        header = input_data.columns
        non_null_data = {}
        all_sampled_data = {}

        for column in header:
            non_null_data[column] = []
            all_sampled_data[column] = []

        empty_count = {}
        column_count = {}

        # we dont need to generate statistic over all of the data, so we subsample, based on our accepted margin of error
        population_size = len(input_data.data_array)

        if population_size < 50:
            sample_size = population_size
        else:
            sample_size = int(
                calculate_sample_size(
                    population_size=population_size,
                    margin_error=CONFIG.DEFAULT_MARGIN_OF_ERROR,
                    confidence_level=CONFIG.DEFAULT_CONFIDENCE_LEVEL))
            if sample_size > 3000 and sample_size > population_size / 8:
                sample_size = min(round(population_size / 8), 3000)

        # get the indexes of randomly selected rows given the population size
        input_data_sample_indexes = random.sample(range(population_size),
                                                  sample_size)
        self.log.info(
            'population_size={population_size},  sample_size={sample_size}  {percent:.2f}%'
            .format(population_size=population_size,
                    sample_size=sample_size,
                    percent=(sample_size / population_size) * 100))

        for sample_i in input_data_sample_indexes:
            row = input_data.data_array[sample_i]
            for i, val in enumerate(row):
                column = header[i]
                value = cast_string_to_python_type(val)
                if not column in empty_count:
                    empty_count[column] = 0
                    column_count[column] = 0
                if value == None:
                    empty_count[column] += 1
                else:
                    non_null_data[column].append(value)
                all_sampled_data[column].append(value)
                column_count[column] += 1
        stats = {}

        col_data_dict = {}
        for i, col_name in enumerate(non_null_data):
            col_data = non_null_data[col_name]  # all rows in just one column
            full_col_data = all_sampled_data[col_name]
            data_type, curr_data_subtype, data_type_dist, data_subtype_dist, additional_info, column_status = self._get_column_data_type(
                col_data, i, input_data.data_array, col_name)

            if column_status == 'Column empty':
                if modify_light_metadata:
                    self.transaction.lmd['malformed_columns']['names'].append(
                        col_name)
                    self.transaction.lmd['malformed_columns'][
                        'indices'].append(i)
                continue

            if data_type == DATA_TYPES.DATE:
                for i, element in enumerate(col_data):
                    if str(element) in [
                            str(''),
                            str(None),
                            str(False),
                            str(np.nan), 'NaN', 'nan', 'NA', 'null'
                    ]:
                        col_data[i] = None
                    else:
                        try:
                            col_data[i] = int(
                                parse_datetime(element).timestamp())
                        except:
                            self.log.warning(
                                'Could not convert string to date and it was expected, current value {value}'
                                .format(value=element))
                            col_data[i] = None

            if data_type == DATA_TYPES.NUMERIC or data_type == DATA_TYPES.DATE:
                newData = []

                for value in col_data:
                    if value != '' and value != '\r' and value != '\n':
                        newData.append(value)

                col_data = [
                    clean_float(i) for i in newData if str(i) not in [
                        '',
                        str(None),
                        str(False),
                        str(np.nan), 'NaN', 'nan', 'NA', 'null'
                    ]
                ]

                y, x = np.histogram(col_data, 50, density=False)
                x = (x + np.roll(x, -1))[:-1] / 2.0
                x = x.tolist()
                y = y.tolist()

                xp = []

                if len(col_data) > 0:
                    max_value = max(col_data)
                    min_value = min(col_data)
                    mean = np.mean(col_data)
                    median = np.median(col_data)
                    var = np.var(col_data)
                    skew = st.skew(col_data)
                    kurtosis = st.kurtosis(col_data)

                    inc_rate = 0.1
                    initial_step_size = abs(max_value - min_value) / 100

                    xp += [min_value]
                    i = min_value + initial_step_size

                    while i < max_value:

                        xp += [i]
                        i_inc = abs(i - min_value) * inc_rate
                        i = i + i_inc
                else:
                    max_value = 0
                    min_value = 0
                    mean = 0
                    median = 0
                    var = 0
                    skew = 0
                    kurtosis = 0
                    xp = []

                is_float = True if max(
                    [1 if int(i) != i else 0
                     for i in col_data]) == 1 else False

                col_stats = {
                    'data_type': data_type,
                    'data_subtype': curr_data_subtype,
                    "mean": mean,
                    "median": median,
                    "variance": var,
                    "skewness": skew,
                    "kurtosis": kurtosis,
                    "max": max_value,
                    "min": min_value,
                    "is_float": is_float,
                    "histogram": {
                        "x": x,
                        "y": y
                    },
                    "percentage_buckets": xp
                }
            elif data_type == DATA_TYPES.CATEGORICAL:
                all_values = []
                for row in input_data.data_array:
                    all_values.append(row[i])

                histogram = Counter(all_values)
                all_possible_values = histogram.keys()

                col_stats = {
                    'data_type': data_type,
                    'data_subtype': curr_data_subtype,
                    "histogram": {
                        "x": list(histogram.keys()),
                        "y": list(histogram.values())
                    }
                    #"percentage_buckets": list(histogram.keys())
                }

            # @TODO This is probably wrong, look into it a bit later
            else:
                # see if its a sentence or a word
                is_full_text = True if curr_data_subtype == DATA_SUBTYPES.TEXT else False
                dictionary, histogram = self._get_words_dictionary(
                    col_data, is_full_text)

                # if no words, then no dictionary
                if len(col_data) == 0:
                    dictionary_available = False
                    dictionary_lenght_percentage = 0
                    dictionary = []
                else:
                    dictionary_available = True
                    dictionary_lenght_percentage = len(dictionary) / len(
                        col_data) * 100
                    # if the number of uniques is too large then treat is a text
                    if dictionary_lenght_percentage > 10 and len(
                            col_data) > 50 and is_full_text == False:
                        dictionary = []
                        dictionary_available = False
                col_stats = {
                    'data_type': data_type,
                    'data_subtype': curr_data_subtype,
                    "dictionary": dictionary,
                    "dictionaryAvailable": dictionary_available,
                    "dictionaryLenghtPercentage": dictionary_lenght_percentage,
                    "histogram": histogram
                }
            stats[col_name] = col_stats
            stats[col_name]['data_type_dist'] = data_type_dist
            stats[col_name]['data_subtype_dist'] = data_subtype_dist
            stats[col_name]['column'] = col_name
            stats[col_name]['empty_cells'] = empty_count[col_name]
            stats[col_name]['empty_percentage'] = empty_count[
                col_name] * 100 / column_count[col_name]
            if 'separator' in additional_info:
                stats[col_name]['separator'] = additional_info['separator']
            col_data_dict[col_name] = col_data

        for i, col_name in enumerate(all_sampled_data):
            if col_name in self.transaction.lmd['malformed_columns']['names']:
                continue

            stats[col_name].update(
                self._compute_duplicates_score(stats, all_sampled_data,
                                               col_name))
            stats[col_name].update(
                self._compute_empty_cells_score(stats, all_sampled_data,
                                                col_name))
            #stats[col_name].update(self._compute_clf_based_correlation_score(stats, all_sampled_data, col_name))
            stats[col_name].update(
                self._compute_data_type_dist_score(stats, all_sampled_data,
                                                   col_name))
            stats[col_name].update(
                self._compute_z_score(stats, col_data_dict, col_name))
            stats[col_name].update(
                self._compute_lof_score(stats, col_data_dict, col_name))
            stats[col_name].update(
                self._compute_similariy_score(stats, all_sampled_data,
                                              col_name))
            stats[col_name].update(
                self._compute_value_distribution_score(stats, all_sampled_data,
                                                       col_name))

            stats[col_name].update(
                self._compute_consistency_score(stats, col_name))
            stats[col_name].update(
                self._compute_redundancy_score(stats, col_name))
            stats[col_name].update(
                self._compute_variability_score(stats, col_name))

            stats[col_name].update(
                self._compute_data_quality_score(stats, col_name))

        total_rows = len(input_data.data_array)

        if modify_light_metadata:
            self.transaction.lmd['column_stats'] = stats
            self.transaction.lmd['data_preparation'][
                'total_row_count'] = total_rows
            self.transaction.lmd['data_preparation']['test_row_count'] = len(
                input_data.test_indexes)
            self.transaction.lmd['data_preparation']['train_row_count'] = len(
                input_data.train_indexes)
            self.transaction.lmd['data_preparation'][
                'validation_row_count'] = len(input_data.validation_indexes)

        self._log_interesting_stats(stats)
        return stats
Esempio n. 59
0
def _handle_tweet(url):
    """http*://twitter.com/*/statuses/*"""
    tweet_url = "https://api.twitter.com/1.1/statuses/show.json?id=%s&include_entities=false&tweet_mode=extended"
    test = re.match(r"https?://.*?twitter\.com\/(\w+)/status(es)?/(\d+)", url)
    if not test:
        return
    # matches for unique tweet id string
    infourl = tweet_url % test.group(3)

    bearer_token = config.get("twitter_bearer")
    if not bearer_token:
        log.info(
            "Use util/twitter_application_auth.py to request a bearer token for tweet handling"
        )
        return _parse_tweet_from_src(url)
    headers = {'Authorization': 'Bearer ' + bearer_token}

    data = bot.get_url(infourl, headers=headers)

    if not data:
        log.warning("Empty response from Twitter api")
        return

    tweet = data.json()
    if 'errors' in tweet:
        for error in tweet['errors']:
            log.warning("Error reading tweet (code %s) %s" %
                        (error['code'], error['message']))
        return

    text = tweet['full_text'].strip()
    user = tweet['user']['screen_name']
    name = tweet['user']['name'].strip()
    verified = tweet['user']['verified']

    retweets = tweet['retweet_count']
    favorites = tweet['favorite_count']
    created_date = parse_datetime(tweet['created_at'])

    def twit_timestr(dt):
        """A coarse timestr function"""

        months = [
            'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep',
            'Oct', 'Nov', 'Dec'
        ]
        diff = datetime.now(tzutc()) - dt
        if diff.days > 30 * 6:
            return "%i %s %i" % (dt.day, months[dt.month - 1], dt.year)
        elif diff.days > 30:
            return "%i %s" % (dt.day, months[dt.month - 1])
        elif diff.days:
            return "%id" % diff.days
        elif diff.seconds > 3600:
            return "%ih" % (diff.seconds / 3600)
        elif diff.seconds > 60:
            return "%im" % (diff.seconds / 60)
        else:
            return "now"

    user = "******".format(user)
    if verified:
        user = "******".format(user)

    tweet = "{0} ({1}) {2}: {3} [♻ {4} ♥ {5}]".format(
        name, user, twit_timestr(created_date), text, retweets, favorites)
    return tweet
Esempio n. 60
0
def share_daily_price(request, start_date, end_date=None):
    api_confirm_data = __validate_api_key(request)
    if api_confirm_data:
        return api_confirm_data

    # check for setting to do scraping !
    if settings.MY_SETTINGS.get("use_scraper"):
        errors: list = []

        # try to scrap data, validate date !
        try:
            from_date: str = str(parse_datetime(start_date).date())

            if end_date:
                to_date: str = str(parse_datetime(end_date).date())
                # api call for grabbing data for range of dates !
                share_data = nepse_web_scraper.get_nepse_data(
                    start_date=from_date, end_date=to_date)
            else:
                # api call for grabbing data for single date !
                share_data = nepse_web_scraper.get_nepse_data_for_date(
                    date=from_date)

            # return json !
            return JsonResponse(share_data, encoder="utf-8")
        except Exception as e:
            # handle error !
            print(e)
            errors.append("Invalid start or end date")
        return JsonResponse({"errors": errors})
    else:
        # grab data from db and render !
        from_date: str = str(parse_datetime(start_date).date())

        company_transaction_cache = share_manager_models.ShareCompanyDetail.objects.all(
        ).select_related("company_name")
        errors: list = []

        try:
            if end_date:
                try:
                    to_date: str = str(parse_datetime(end_date).date())

                    company_transaction = company_transaction_cache.filter(
                        Q(company_transaction_date__gte=from_date)
                        & Q(company_transaction_date__lte=to_date))
                except Exception as e:
                    print(e)
                    errors.append("invalid date")
                    return JsonResponse({"errors": errors})
            else:
                company_transaction = company_transaction_cache.filter(
                    company_transaction_date=from_date)

            company_transaction_serializer = share_manager_serializers.ShareCompanyDetailSerializer(
                company_transaction,
                many=True,
            )

            return JsonResponse({"data": company_transaction_serializer.data})
        except Exception as e:
            print(e)
            errors.append(
                "value has an invalid date format. It must be in YYYY-MM-DD format."
            )
            return JsonResponse({"errors": errors})