Esempio n. 1
0
class TestTypeImport(unittest.TestCase):
    """
        Test TypeImport
    """
    def setUp(self):
        self.interval = 1
        self.periodic = ''
        self.execute = True
        self.job = Job(interval=timedelta(seconds=self.interval),
                       execute=self.periodically)

    def test_run(self):
        self.job.start()
        with self.assertRaises(Exception):
            self.signal_handler()
        self.job.stop()

    def periodically(self):
        self.periodic = 'run test'
        return 1

    def periodically_error(self):
        raise TypeError('lets see if this works')

    def test_job_error(self):
        job = Job(interval=timedelta(seconds=self.interval),
                  execute=self.periodically_error)
        job.start()
        signal.signal(signal.SIGTERM, signal_handler)
        signal.signal(signal.SIGINT, signal_handler)
        with self.assertRaises(Exception):
            self.periodically_error()
        job.stop()
Esempio n. 2
0
    def get_job_by_id(self, _id: str) -> Job:
        """
        returns a job given an id
        :param _id: id as str
        :return: job as Job object
        """
        _job = list(filter(lambda x: x["id"] == _id, self.jobs))
        if len(_job) != 1:
            raise Exception(f"ERROR: No jobs with id {_id}")

        return Job(_job[0])
Esempio n. 3
0
def get_tweets():
    bq_service = BigQueryService()
    print("LIMIT:", LIMIT)
    job = Job()

    tweets = []
    job.start()
    for row in bq_service.fetch_labeled_tweets_in_batches(limit=LIMIT):
        tweets.append(dict(row))
        job.counter += 1
        if job.counter % BATCH_SIZE == 0:
            job.progress_report()
    job.end()
    print("FETCHED TWEETS:", fmt_n(len(tweets)))
    return DataFrame(tweets)
Esempio n. 4
0
def download_data():
    job = Job()
    bq_service = BigQueryService()

    job.start()
    records = []
    for row in bq_service.fetch_user_details_vq(limit=LIMIT):
        #print(row)
        records.append(dict(row))

        job.counter += 1
        if job.counter % BATCH_SIZE == 0:
            job.progress_report()
    job.end()

    return DataFrame(records)
Esempio n. 5
0
 def test_job_error(self):
     job = Job(interval=timedelta(seconds=self.interval),
               execute=self.periodically_error)
     job.start()
     signal.signal(signal.SIGTERM, signal_handler)
     signal.signal(signal.SIGINT, signal_handler)
     with self.assertRaises(Exception):
         self.periodically_error()
     job.stop()
Esempio n. 6
0
class TestJob:
    test_data = {
        "id": "90038429",
        "title": "Python Developer",
        "company": "Durlston Partners London Limited",
        "contract": "Permanent",
        "age": "Expires in 1 day",
        "date": "27/04/2022",
        "location": "EC1, City of London",
        "link": "https://www.google.com",
        "agency": "CW Jobs",
        "summary": "They are... ",
        "interested": "N",
        "reviewed": "N",
        "email": "N"
    }

    def setup_method(self):
        self.job = Job(self.test_data)

    @patch("app.job.requests.put")
    def test_put_job(self, mock):
        self.job.put_job()

        assert json.loads(mock.call_args.kwargs['data']) == self.test_data

    def test_update_job(self):
        self.job.update_job(title="1")

        assert self.job.job['title'] == "1"

    @patch("app.email_job.Email.send_job_alert")
    def test_send_alert(self, mock):
        self.job.send_alert()

        assert self.job.job['email'] != "N"
    print("  LIMIT:", LIMIT)
    print("  BATCH_SIZE:", BATCH_SIZE)
    print("  DESTRUCTIVE:", DESTRUCTIVE)

    #print("  GRAPH_LIMIT:", GRAPH_LIMIT)
    print("  GRAPH_BATCH_SIZE:", GRAPH_BATCH_SIZE)
    print("  GRAPH_DESTRUCTIVE:", GRAPH_DESTRUCTIVE)

    print("------------------------")
    storage = FileStorage(
        dirpath=f"daily_active_friend_graphs_v4/{DATE}/tweet_min/{TWEET_MIN}")
    tweets_csv_filepath = os.path.join(storage.local_dirpath, "tweets.csv")

    bq_service = BigQueryService()
    job = Job()

    #
    # LOAD TWEETS
    # tweet_id, text, screen_name, bot, created_at

    # TODO: de-dup RTs so the model will only train/test on a single RT status text (PREVENT OVERFITTING)
    if os.path.exists(tweets_csv_filepath) and not DESTRUCTIVE:
        print("LOADING TWEETS...")
        statuses_df = read_csv(tweets_csv_filepath)
    else:
        job.start()
        print("DOWNLOADING TWEETS...")
        statuses = []
        for row in bq_service.fetch_daily_active_tweeter_statuses(
                date=DATE, tweet_min=TWEET_MIN, limit=LIMIT):
import signal
import time
from datetime import timedelta
from app.job import Job, ProgramKilled, signal_handler
from app.on_watch_file import OnWatchFile
from app.settings import get_logger

logger = get_logger('Main Watcher File')

if __name__ == "__main__":

    logger.info('Start Watcher File')
    watch = OnWatchFile()
    watch.connect_directory_monitoring()
    signal.signal(signal.SIGTERM, signal_handler)
    signal.signal(signal.SIGINT, signal_handler)

    job = Job(interval=timedelta(seconds=watch.wait_time_seconds_job), execute=watch.periodically)
    job.start()
    while True:
        try:
            time.sleep(1)
        except ProgramKilled:
            print("Program killed: running cleanup code")
            job.stop()
            break
Esempio n. 9
0
 def setup_method(self):
     self.job = Job(self.test_data)
Esempio n. 10
0
 def setUp(self):
     self.interval = 1
     self.periodic = ''
     self.execute = True
     self.job = Job(interval=timedelta(seconds=self.interval),
                    execute=self.periodically)
Esempio n. 11
0
    return len(batch)


if __name__ == "__main__":

    print("-------------------")
    print("BASILICA EMBEDDER...")
    print("  MIN PARTITION VAL:", MIN_VAL)
    print("  MAX PARTITION VAL:", MAX_VAL)
    print("  LIMIT:", LIMIT)
    print("  BATCH SIZE:", BATCH_SIZE)

    bq_service = BigQueryService()
    bas_service = BasilicaService()
    job = Job()

    job.start()
    records = list(
        bq_service.fetch_basilica_embedless_partitioned_statuses(
            min_val=MIN_VAL, max_val=MAX_VAL, limit=LIMIT))
    job.counter = len(records)

    batches = list(split_into_batches(records, BATCH_SIZE))
    print("BATCHES:", len(batches))
    job.end()
    del records

    job.start()

    with ThreadPoolExecutor(max_workers=MAX_THREADS,