Ejemplo n.º 1
0
def test_execute_skipping(tmp_path: Path, caplog: LogCaptureFixture) -> None:
    batch_file = tmp_path / "batch.jsonl"
    results_dir = tmp_path / "out"

    # Execute request for the first time.
    batch = Batch()
    batch.append(Search("trump", max_tweets=50))
    batch.dump(batch_file)
    assert batch.execute(results_dir)
    _assert_results_dir_structure(results_dir, list(batch))

    batch_entry = batch[0]
    meta_file = results_dir / batch_entry.meta_file_name
    data_file = results_dir / batch_entry.data_file_name
    meta_stat1 = meta_file.stat()
    data_stat1 = data_file.stat()

    # Execute same request again (should be skipped).
    batch = Batch(
    )  # Recreate from dumped batch file so that batch entry IDs match.
    batch.load(batch_file)
    caplog.clear()
    assert batch.execute(results_dir)
    assert 1 == len(
        [record for record in caplog.records if "Skipping" in record.msg])
    _assert_results_dir_structure(results_dir, list(batch))
    meta_stat2 = meta_file.stat()
    data_stat2 = data_file.stat()

    # Verify that files were not modified.
    assert meta_stat1.st_mtime_ns == meta_stat2.st_mtime_ns
    assert data_stat1.st_mtime_ns == data_stat2.st_mtime_ns
Ejemplo n.º 2
0
 def run(self) -> None:
     request = self._build_request()
     if self.to_batch:
         batch = Batch()
         if self.to_batch.exists():
             batch.load(self.to_batch)
         self._batch_submit(batch, request)
         batch.dump(self.to_batch)
     else:
         for tweet in request.request():
             sys.stdout.write(json.dumps(tweet.to_json()) + "\n")
Ejemplo n.º 3
0
def test_dump_load_single(request_: Request, tmp_path: Path) -> None:
    batch_file = tmp_path / "batch.jsonl"

    batch = Batch()
    batch.append(request_)
    batch.dump(batch_file)

    lines = list(read_lines_file(batch_file))
    assert 1 == len(lines)
    assert 0 != len(lines[0])

    batch2 = Batch()
    batch2.load(batch_file)
    assert list(batch) == list(batch2)
Ejemplo n.º 4
0
def test_dump_load_multiple(num_batch_entries: int, tmp_path: Path) -> None:
    batch_file = tmp_path / "batch.jsonl"

    batch = Batch()
    for i in range(1, num_batch_entries + 1):
        batch.append(Search(str(i), max_tweets=i, batch_size=i))
    batch.dump(batch_file)

    lines = list(read_lines_file(batch_file))
    assert num_batch_entries == len(lines)
    for line in lines:
        assert 0 != len(line)

    batch2 = Batch()
    batch2.load(batch_file)
    assert list(batch) == list(batch2)
Ejemplo n.º 5
0
def test_correct_call_to_batch(
    request_: Request,
    capsys: CaptureFixture,
    tmp_path: Path,
) -> None:
    batch_file = tmp_path / "batch.jsonl"

    main(*_make_args(request_, to_batch=batch_file))

    assert capsys.readouterr().out == ""
    batch = Batch()
    batch.load(batch_file)
    assert len(batch) == 1
    assert batch[0].request == request_
    assert batch[0].id
    assert batch[0].completed_at is None
    assert batch[0].exception is None
Ejemplo n.º 6
0
def test_correct_call_to_batch_daily(capsys: CaptureFixture,
                                     tmp_path: Path) -> None:
    batch_file = tmp_path / "batch.jsonl"
    request = Search("trump", since=date(2019, 1, 1), until=date(2019, 2, 1))

    # Needed for type checking.
    assert request.until is not None and request.since is not None

    main(*_make_args(request, to_batch=batch_file, daily=True))

    assert capsys.readouterr().out == ""
    batch = Batch()
    batch.load(batch_file)
    assert len(batch) == (request.until - request.since).days
    for batch_entry, expected_request in zip(batch,
                                             request.to_daily_requests()):
        assert batch_entry.request == expected_request
        assert batch_entry.id
        assert batch_entry.completed_at is None
        assert batch_entry.exception is None
Ejemplo n.º 7
0
def test_correct_call_to_batch_exists(
    old_request: Request,
    new_request: Request,
    capsys: CaptureFixture,
    tmp_path: Path,
) -> None:
    batch_file = tmp_path / "batch.jsonl"
    batch = Batch()
    batch.append(old_request)
    batch.dump(batch_file)

    main(*_make_args(new_request, to_batch=batch_file))

    assert capsys.readouterr().out == ""
    batch = Batch()
    batch.load(batch_file)
    assert len(batch) == 2
    for batch_entry, expected_request in zip(batch,
                                             [old_request, new_request]):
        assert batch_entry.request == expected_request
        assert batch_entry.id
        assert batch_entry.completed_at is None
        assert batch_entry.exception is None
Ejemplo n.º 8
0
 def run(self) -> None:
     batch = Batch()
     batch.load(self.batch_file)
     batch.execute(self.results_dir)