Ejemplo n.º 1
0
def _mock_posts(request, context) -> str:
    """Mock crimson hexagon api call for requests_mock."""
    params = parse_qs(urlparse(request.url).query)

    start_date = dateutil.parser.parse(params['startDate'][0])
    end_date = dateutil.parser.parse(params['endDate'][0])

    posts = get_mock_data(start_date, end_date)

    results = ','.join([
        '{ "url": "http://twitter.com/%s/status/%s"}' %
        (p['author'], p['post_id']) for p in posts
    ])

    context.status_code = 200
    context.headers = {'Content-Type': 'application/json; charset=UTF-8'}

    json = \
        """
        {
          "results": [%s],
          "resultsPage": 0,
          "resultsPageSize": 10,
          "resultsTotal": 6563,
          "startDate": "%s",
          "endDate": "%s"
        }
        """ % (results, start_date, end_date)

    return json
Ejemplo n.º 2
0
    def fetch_posts_from_api(self, query: str, start_date: datetime, end_date: datetime) -> list:
        """Return posts from a csv that are within the given date range."""
        if self.mock_enabled:
            query = self._get_csv_string_from_dicts(get_mock_data())

        all_posts = self._get_dicts_from_csv_string(query)

        required_fields = ['content', 'author', 'publish_date']
        for post in all_posts:
            for field in required_fields:
                if field not in post:
                    raise McPostsGenericDataException(f"Missing required field: {field}")
            
            post['data'] = {}
            if 'channel' not in post:
                post['channel'] = post['author']

            if 'post_id' not in post:
                post['post_id'] = uuid.uuid4().hex

            post['post_id'] =str(post['post_id'])

        posts = filter_posts_for_date_range(all_posts, start_date, end_date)

        return posts
Ejemplo n.º 3
0
def _mock_pushshift(request, context) -> str:
    """Mock response from pushshift based on posts.get_mock_data."""
    filters = request.json()['query']['function_score']['query']['bool']['must']

    start_date = None
    end_date = None
    for filter in filters:
        if not 'range' in filter:
            continue

        created_utc = filter['range']['created_utc']

        if 'gte' in created_utc:
            start_date = datetime.datetime.fromtimestamp(created_utc['gte'])
        elif 'lt' in created_utc:
            end_date = datetime.datetime.fromtimestamp(created_utc['lt'])

    assert((end_date is not None) and (start_date is not None))

    base_dir = os.path.dirname(os.path.realpath(__file__))
    response_template_file = base_dir + "/pushshift_response.json.jinja"

    posts = get_mock_data(start_date, end_date)

    for post in posts:
        post['publish_epoch'] = dateutil.parser.parse(post['publish_date']).timestamp()

    with open(response_template_file) as f:
        template = Template(f.read())
        response_json = template.render(posts=posts)

    context.status_code = 200
    context.headers = {'Content-Type': 'application/json; charset=UTF-8'}

    return response_json
Ejemplo n.º 4
0
    def fetch_posts_from_api(
        self,
        query: str,
        start_date: datetime,
        end_date: datetime,
        sample: Optional[int] = None,
        page_size: Optional[int] = None,
    ) -> list:
        """Return posts from a csv that are within the given date range."""
        db = mediawords.db.connect_to_db()

        assert sample is None, "Sampling is not implemented."
        assert page_size is None, "Page size limiting is not supported."

        if self.mock_enabled:
            query = self._insert_mock_data(db, get_mock_data())

        table = query

        if re.search(r'[^[a-z][A-Z][0-9]_]', table):
            raise McPostgresGenericDataException(
                f'illegal table name: {table}')

        posts = db.query(
            f"""
            select content, publish_date, author, post_id, channel
                from {table} 
                where publish_date::timestamp between %(a)s and %(b)s
            """, {
                'a': start_date,
                'b': end_date
            }).hashes()

        return posts
Ejemplo n.º 5
0
    def _get_mock_json(self, start_date: datetime, end_date: datetime):
        """return json in googler format derived from get_mock_data()."""
        mock_data = get_mock_data(start_date, end_date)

        json_data = []
        for d in mock_data:
            json_data.append({
                'abstract':
                d['content'],
                'url':
                'http://foo.bar/' + d['post_id'],
                'title':
                d['content'],
                'metadata':
                dateutil.parser.parse(d['publish_date']).strftime('%b %e, %Y,')
            })

        return encode_json(json_data)
Ejemplo n.º 6
0
    def test_mock_data(self, query: str = '') -> None:
        """Run test of object using mock data.

        This should work on any class, as long as fetch_post() is implemented to return the data from
        get_mock_data when mock_enabled = True.
        """
        self.mock_enabled = True

        expected_posts = get_mock_data()

        start_date = dateutil.parser.parse(expected_posts[0]['publish_date'])
        end_date = dateutil.parser.parse(expected_posts[-1]['publish_date'])

        got_posts = self.fetch_posts(query, start_date, end_date)

        assert len(got_posts) == len(expected_posts)
        for i, got_post in enumerate(got_posts):
            self.validate_mock_post(got_post, expected_posts[i])
def _mock_ch_posts(request, context) -> str:
    """Mock crimson hexagon api call for requests_mock."""
    params = parse_qs(urlparse(request.url).query)

    start_date = dateutil.parser.parse(params['start'][0])
    end_date = dateutil.parser.parse(params['end'][0])

    posts = get_mock_data(start_date, end_date)

    ch_posts = []
    for post in posts:
        url = 'http://twitter.com/%s/status/%s' % (post['author'],
                                                   post['post_id'])
        p = """\
{
  "url": "%s",
  "title": "",
  "type": "Twitter",
  "language": "en",
  "assignedCategoryId": 25841371963,
  "assignedEmotionId": 25841371954,
  "categoryScores": [
    {
      "categoryId": 25841371962,
      "categoryName": "Basic Neutral",
      "score": 0
    },
    {
      "categoryId": 25841371963,
      "categoryName": "Basic Negative",
      "score": 1
    },
    {
      "categoryId": 25841371960,
      "categoryName": "Basic Positive",
      "score": 0
    }
  ],
  "emotionScores": [
    {
      "emotionId": 25841371954,
      "emotionName": "Disgust",
      "score": 0.4
    },
    {
      "emotionId": 25841371955,
      "emotionName": "Joy",
      "score": 0.01
    },
    {
      "emotionId": 25841371958,
      "emotionName": "Neutral",
      "score": 0.01
    },
    {
      "emotionId": 25841371959,
      "emotionName": "Fear",
      "score": 0.09
    },
    {
      "emotionId": 25841371956,
      "emotionName": "Sadness",
      "score": 0.22
    },
    {
      "emotionId": 25841371957,
      "emotionName": "Anger",
      "score": 0.16
    },
    {
      "emotionId": 25841371961,
      "emotionName": "Surprise",
      "score": 0.12
    }
  ]
}\
        """ % url
        ch_posts.append(p)

    context.status_code = 200
    context.headers = {'Content-Type': 'application/json; charset=UTF-8'}

    json = '{"status": "success", "posts":[%s]}' % ',\n'.join(ch_posts)

    return json