Example #1
0
 def update_job_status(self, job_id):
     """Updates a spark job's status based on information from getJobInfo()."""
     v = self.jobs[job_id]
     jobinfo = self.status.getJobInfo(v["spark_id"])
     done_string = 'false' if jobinfo.status == 'RUNNING' else 'true'
     total_total = v["chunk_size"] * v["chunks"]
     if v["finished"] == v["started"] and done_string == "true":
         v["chunks_done"] += 1
         if v["chunks_done"] == v["chunks"]:
             v["finished"] = datetime.now()
         total_done = v["chunk_size"] * v["chunks_done"]
     else:
         (nTasks, nActive, nComplete) = (0, 0, 0)
         for sid in jobinfo.stageIds:
             stageinfo = self.status.getStageInfo(sid)
             if stageinfo:
                 nTasks += stageinfo.numTasks
                 nActive += stageinfo.numActiveTasks
                 nComplete += stageinfo.numCompletedTasks
             if v["chunks"] > 0 and v["chunk_size"] == 0:
                 v["chunk_size"] = nTasks
         total_done = v["chunk_size"] * v["chunks_done"] + nComplete
     total_done = min(total_done, total_total)
     my_print("Setting job totals: %d %d %d %d %d" %
              (v["chunk_size"], v["chunks"], v["chunks_done"], total_total,
               total_done))
     self.db.query('''
         UPDATE jobs SET tasks_done=%d, tasks_total=%d, status='%s', done=%s, finish='%s'
         WHERE id=%d
         ''' % (total_done, total_total, jobinfo.status, done_string,
                str(self.jobs[job_id]["finished"]), job_id))
Example #2
0
	def get_webpage_content(self, url):
		# return str html
		try:
			req = urllib.request.Request(url)
			response = urllib.request.urlopen(req)
			webpage_str = response.read()
			return str(webpage_str)
		except Exception as e:
			my_print("error reading:" + url)
			return None
Example #3
0
 def update_all_jobs_callback(self):
     """For each job, checks whether its status has changed."""
     try:
         my_print("updating spark jobs status...")
         for job_id, v in self.jobs.iteritems():
             if v["finished"] == v["started"]:
                 self.update_job_status(job_id)
     finally:
         tornado.ioloop.IOLoop.instance().add_timeout(
             timedelta(seconds=5), self.update_all_jobs_callback)
Example #4
0
def get_acc(param):
    if len(data.training_input) < config.training_segment:
        print_error("training segment > data number")
        return False
    d = 0.0

    # Run all seq between [training_segment, training_input_len)
    for i in range(config.training_segment, data.training_input_len + 1, RNN.config.batch_num):
        c = data.training_input_sequential_selection(RNN.config.batch_num, i)
        d += RNN.train.cost(c[0], c[1])
        # print(RNN.train.dimshuffle(c[0]))
    my_print("Cost", d)
Example #5
0
 def train(self, X, Y,
           ordered_topics,
           balance='p',
           max_per_class=100):
     topic = ''
     for k in xrange(self.K):
         topic = ordered_topics[k]
         Y_k = Y[:,k:k+1]
         self.svms[k].train(X, Y_k,
                            balance=balance,
                            max_per_class=max_per_class,
                            topic=topic)
         util.my_print('%d . . .' % (k+1), same_line=True)
Example #6
0
	def get_lastest_page_info(self, latest_webpage_str):
		my_print("get page info")
		date = re.findall(r"Date:\s+(.*),\s+2020<br/>", latest_webpage_str)
		positive_cases = re.findall(r"([,\d]+)[\\\d\w\s]+Positive cases", latest_webpage_str)
		death = re.findall(r"([,\d]+)[\\\d\w\s]+Deaths", latest_webpage_str)
		pending = re.findall(r" least ([\+,\d]+) results have been received and another ([\+,\d]+) are pending", latest_webpage_str)
		return {
			'date': date[0] if len(date) > 0 else None,
			'positive_cases': positive_cases[0] if len(positive_cases) > 0 else None,
			'death': death[0] if len(death) > 0 else None,
			'results received': pending[0][0] if len(pending) else None,
			'results pending': pending[0][1] if len(pending) else None
		}
Example #7
0
def main():
    """Creates tornado application, handles keyboard interrupts (to release the http socket)."""
    global args, config

    parser = argparse.ArgumentParser(description='IMS webserver.')
    parser.add_argument('--config',
                        dest='config',
                        type=str,
                        help='config file name')
    parser.add_argument('--port',
                        dest='port',
                        type=int,
                        help='port on which to access the web server')
    parser.add_argument('--profile',
                        dest='time_profiling_enabled',
                        action='store_true')
    parser.add_argument('--use-deprecated',
                        dest='use_deprecated',
                        action='store_true')
    parser.add_argument('--debug', dest='debug', action='store_true')
    parser.set_defaults(spark=False,
                        config='config.json',
                        port=8080,
                        time_profiling_enabled=False,
                        use_deprecated=False)
    args = parser.parse_args()
    # handlers.args = args

    with open(args.config) as f:
        config = json.load(f)

    port = args.port
    torn_app = Application(args.debug)
    http_server = tornado.httpserver.HTTPServer(torn_app)
    http_server.listen(port)
    my_print("Starting server, listening to port %d..." % port)
    try:
        # set periodic updates
        if args.spark:
            tornado.ioloop.IOLoop.instance().add_timeout(
                timedelta(seconds=5), torn_app.update_all_jobs_callback)
        # start loop
        tornado.ioloop.IOLoop.instance().start()
    except KeyboardInterrupt:
        my_print('^C received, shutting down server')
        if args.spark:
            torn_app.sc.stop()
        http_server.stop()
Example #8
0
    def __init__(self, args, qry_file, printDet=False):
        self.queries, self.thres_10, self.thres_100, self.thres_1000, self.qids, self.qterms, _ = read_queries_and_thres(
            qry_file, 0, printDet)
        self.tensor_queries = create_tensors_from_np(self.queries)
        self.qlens = []
        for qt in self.qterms:
            self.qlens.append(len(qt))

        if args.k == 10:
            self.tensor_thres = create_thresholds(self.thres_10)
        if args.k == 100:
            self.tensor_thres = create_thresholds(self.thres_100)
        if args.k == 1000:
            self.tensor_thres = create_thresholds(self.thres_1000)

        my_print("dataset statistics:", qry_file)
        my_print("\tqueries =", len(self.queries))
Example #9
0
def main():
    scrapper = Scrapper()
    # find all occurences of /Programs/OPA/Pages/NR.*.aspx
    all_urls = re.findall(r"(/Programs/OPA/Pages/NR20-\d+.aspx)",
                          scrapper.get_webpage_content(main_page_url))
    all_urls = sorted(all_urls, key=cmp_to_key(scrapper.get_latest_date))

    # get latest page content
    latest = all_urls[-3]
    latest_webpage_str = scrapper.get_webpage_content(website_url + latest)

    dedup_url = {}
    for url in all_urls[-10:]:
        if url not in dedup_url:
            dedup_url[url] = True
            my_print(url)
            content = scrapper.get_webpage_content(website_url + url)
            if content:
                # print(content)
                print(scrapper.get_lastest_page_info(content))
Example #10
0
def train(model, epoch, data, quant):
    dataloader = DataLoader(data, batch_size=args.batch_size, shuffle=True)

    quantiles = torch.tensor([[quant]]).view(1, -1).float().to(args.device)

    model.train()
    optim = torch.optim.Adam(model.parameters(), lr=lr)
    my_print("epoch {} start training {} instances with lr {}".format(
        epoch, len(dataloader), lr))

    with tqdm(total=len(dataloader), unit='batches', desc='train') as pbar:
        losses = []
        for batch_num, batch in enumerate(dataloader):
            optim.zero_grad()
            queries, thres = batch
            scores = model(queries.to(args.device))
            loss = quantile_loss(scores, thres.to(args.device), quantiles)
            losses.append(loss.item())
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
            optim.step()
            pbar.set_postfix(loss=np.mean(losses[-128:]))
            pbar.update(1)
        pbar.close()
Example #11
0
                    help='quantiles',
                    type=float,
                    required=True)
parser.add_argument('--device',
                    default="cpu",
                    type=str,
                    required=False,
                    help='compute device')
parser.add_argument('--debug',
                    default=False,
                    dest='debug',
                    action='store_true')
args = parser.parse_args()
init_log(args)
torch.set_num_threads(hyperparams.default_threads)
my_print("Parameters:")
for k, v in sorted(vars(args).items()):
    my_print("\t{0}: {1}".format(k, v))

# Set the random seed manually for reproducibility.
torch.manual_seed(hyperparams.random_seed)

###############################################################################
# Load data
###############################################################################
if torch.cuda.is_available():
    if args.device != "cpu":
        torch.cuda.set_device(int(args.device.split(":")[1]))
        args.device = torch.device(args.device)
    else:
        args.device = torch.device('cuda')
Example #12
0
def print_status(param):
    if len(param) == 0 or param[0] == "testbench":
        print("========== TestBench Status ==========")
        my_print("Train input file", tc.train_input_file)
        my_print("Train answer file", tc.train_answer_file)
        my_print("Test input file", tc.test_input_file)
        my_print("Train input file", tc.test_output_file)

    if len(param) == 0 or param[0] == "rnn":
        print("========== RNN Status ================")
        my_print("Input dimension", rc.input_dim)
        my_print("Output dimension", rc.output_dim)
        my_print("Hidden layer list", rc.hidden_layer_dim_list)
        my_print("Batch size", rc.batch_num)
        my_print("Learning rate", rc.learning_rate)

    print("======================================")