Ejemplo n.º 1
0
def image_process_thread(url, client_key, queue, results,
                         lock_stats, counter, avg_req_time, time_start, time_end):
    while True:
        # get image URL entry to process
        entry = None
        try:
            entry = queue.get(block=False)
        except:
            pass
        if entry:
            image_id = entry['id']
            img_url = entry['url']
            model_id = entry['model']
            if img_url == 'shutdown':
                print('thread shutting down')
                break
            params = __PARAMS.copy()  # note the module variables as defined in restb/sdk/__init__.py
            params['client_key'] = client_key
            params['image_url'] = img_url
            params['model_id'] = model_id
            endpoint = __MODELS[model_id]
            start_time = now_millis()
            resp = service(url=url, endpoint=endpoint, params=params)
            end_time = now_millis()
            msg = '[{http}] <{limit}> thread [{thread}] {msg}'
            if resp.status_code == 200:
                vals = json.loads(resp.text)
                results.put(dict(id=image_id, model=model_id, result=vals['response']))
                total = end_time - start_time
                print(msg.format(
                    http=resp.status_code,
                    limit=resp.headers['X-RateLimit-Remaining-second'],
                    thread=mp.current_process().name,
                    msg='processed request in [' + str(total) + '] ms')
                )
                # increment counter
                lock_stats.acquire()
                counter.value += 1
                avg_req_time.value += total
                if start_time < time_start.value:
                    time_start.value = start_time
                if end_time > time_end.value:
                    time_end.value = end_time
                lock_stats.release()
            elif resp.status_code == 429:
                # handle over-rate limit retrying
                print(msg.format(
                    http=resp.status_code,
                    limit=resp.headers['X-RateLimit-Remaining-second'],
                    thread=mp.current_process().name,
                    msg='surpassed rate limit, trying again')
                )
                # re-queue entry and try again, then sleep for ideal average time between requests
                queue.put(entry)
                time.sleep(1 / float(__requests_per_second))
        else:
            time.sleep(1)
Ejemplo n.º 2
0
    def _upload(num_retries):
        worker_name = multiprocessing.current_process().name
        log.Debug("%s: Uploading chunk %d" % (worker_name, offset + 1))
        try:
            conn = get_connection(scheme, parsed_url, storage_uri)
            bucket = conn.lookup(bucket_name)

            for mp in bucket.list_multipart_uploads():
                if mp.id == multipart_id:
                    with FileChunkIO(filename, 'r', offset=offset * bytes, bytes=bytes) as fd:
                        start = time.time()
                        mp.upload_part_from_file(fd, offset + 1, cb=_upload_callback,
                                                 num_cb=max(2, 8 * bytes / (1024 * 1024))
                                                 )  # Max num of callbacks = 8 times x megabyte
                        end = time.time()
                        log.Debug(("{name}: Uploaded chunk {chunk}"
                                  "at roughly {speed} bytes/second").format(name=worker_name,
                                                                            chunk=offset + 1,
                                                                            speed=(bytes / max(1, abs(end - start)))))
                    break
            conn.close()
            conn = None
            bucket = None
            del conn
        except Exception as e:
            traceback.print_exc()
            if num_retries:
                log.Debug("%s: Upload of chunk %d failed. Retrying %d more times..." % (
                    worker_name, offset + 1, num_retries - 1))
                return _upload(num_retries - 1)
            log.Debug("%s: Upload of chunk %d failed. Aborting..." % (
                worker_name, offset + 1))
            raise e
        log.Debug("%s: Upload of chunk %d complete" % (worker_name, offset + 1))
Ejemplo n.º 3
0
    def _upload(num_retries):
        worker_name = multiprocessing.current_process().name
        log.Debug("%s: Uploading chunk %d" % (worker_name, offset + 1))
        try:
            conn = get_connection(scheme, parsed_url)
            bucket = conn.lookup(bucket_name)

            for mp in bucket.get_all_multipart_uploads():
                if mp.id == multipart_id:
                    with FileChunkIO(filename,
                                     'r',
                                     offset=offset * bytes,
                                     bytes=bytes) as fd:
                        mp.upload_part_from_file(
                            fd,
                            offset + 1,
                            cb=_upload_callback,
                            num_cb=max(2, 8 * bytes / (1024 * 1024))
                        )  # Max num of callbacks = 8 times x megabyte
                    break
        except Exception, e:
            traceback.print_exc()
            if num_retries:
                log.Debug(
                    "%s: Upload of chunk %d failed. Retrying %d more times..."
                    % (worker_name, offset + 1, num_retries - 1))
                return _upload(num_retries - 1)
            log.Debug("%s: Upload of chunk %d failed. Aborting..." %
                      (worker_name, offset + 1))
            raise e
Ejemplo n.º 4
0
def run_xfers(hostname, port, xfer_count, account_ids, success_ctr, retry_ctr):
    pname = mpd.current_process().name
    log = logging.getLogger('test_bank.run_txfers[%s]' % (pname, ))
    c = client.DgraphClient(hostname, port)

    while True:
        from_acc, to_acc = select_account_pair(account_ids)
        query = """{{
            me(func: uid("{uid1:s}", "{uid2:s}")) {{
                uid,
                bal
            }}
        }}""".format(uid1=from_acc, uid2=to_acc)
        txn = c.txn()
        accounts = load_from_query(txn, query, 'me')
        accounts[0]['bal'] += 5
        accounts[1]['bal'] -= 5
        try:
            dump_from_obj(txn, accounts)
            with success_ctr.get_lock():
                success_ctr.value += 1

            if not success_ctr.value % 100:
                log.info('Runs %d. Aborts: %d', success_ctr.value,
                         retry_ctr.value)
            if success_ctr.value >= xfer_count:
                break
        except grpc._channel._Rendezvous as e:
            logging.warn(e)
            with retry_ctr.get_lock():
                retry_ctr.value += 1
Ejemplo n.º 5
0
    def _monitorJob(self, ftsJob):
        """
        * query the FTS servers
        * update the FTSFile status
        * update the FTSJob status
    """
        # General try catch to avoid that the tread dies
        try:
            threadID = current_process().name
            log = gLogger.getSubLogger("_monitorJob/%s" % ftsJob.jobID,
                                       child=True)

            res = self.getFTS3Context(ftsJob.username,
                                      ftsJob.userGroup,
                                      ftsJob.ftsServer,
                                      threadID=threadID)

            if not res['OK']:
                log.error("Error getting context", res)
                return ftsJob, res

            context = res['Value']

            res = ftsJob.monitor(context=context)

            if not res['OK']:
                log.error("Error monitoring job", res)
                return ftsJob, res

            # { fileID : { Status, Error } }
            filesStatus = res['Value']

            # Specify the job ftsGUID to make sure we do not overwrite
            # status of files already taken by newer jobs
            res = self.fts3db.updateFileStatus(filesStatus,
                                               ftsGUID=ftsJob.ftsGUID)

            if not res['OK']:
                log.error("Error updating file fts status",
                          "%s, %s" % (ftsJob.ftsGUID, res))
                return ftsJob, res

            upDict = {
                ftsJob.jobID: {
                    'status': ftsJob.status,
                    'error': ftsJob.error,
                    'completeness': ftsJob.completeness,
                    'operationID': ftsJob.operationID,
                    'lastMonitor': True,
                }
            }
            res = self.fts3db.updateJobStatus(upDict)

            if ftsJob.status in ftsJob.FINAL_STATES:
                self.__sendAccounting(ftsJob)

            return ftsJob, res

        except Exception as e:
            return ftsJob, S_ERROR(0, "Exception %s" % repr(e))
Ejemplo n.º 6
0
def run_transfers(addr, transfer_count, account_ids, success_ctr, retry_ctr):
    pname = mpd.current_process().name
    log = logging.getLogger('test_bank.run_transfers[%s]' % (pname, ))
    c = helper.create_client(addr)

    while True:
        from_acc, to_acc = select_account_pair(account_ids)
        query = """{{
            me(func: uid("{uid1:s}", "{uid2:s}")) {{
                uid,
                bal
            }}
        }}""".format(uid1=from_acc, uid2=to_acc)

        txn = c.txn()
        try:
            accounts = load_from_query(txn, query, 'me')
            accounts[0]['bal'] += 5
            accounts[1]['bal'] -= 5
            dump_from_obj(txn, accounts)
            with success_ctr.get_lock():
                success_ctr.value += 1

            if not success_ctr.value % 100:
                log.info('Runs %d. Aborts: %d', success_ctr.value,
                         retry_ctr.value)
            if success_ctr.value >= transfer_count:
                break
        except:
            with retry_ctr.get_lock():
                retry_ctr.value += 1

    with success_ctr.get_lock(), retry_ctr.get_lock():
        log.info('success: %d, retries: %d', success_ctr.value,
                 retry_ctr.value)
Ejemplo n.º 7
0
    def enhance_car_db(self, itemid):

        thread_id = int(
            str(current_process()).split("Thread-")[1].split(", ")[0])
        log.info("Worker thread number " + str(thread_id))

        if thread_id not in self.driver:
            try:
                self.driver[thread_id] = webdriver.Firefox(
                    executable_path=self.geckodriver_path,
                    firefox_profile=self.selenium_profile)
                self.driver[thread_id].set_page_load_timeout(
                    self.selenium_page_timeout)
                log.debug(
                    "Creating selenium driver for worker thread number " +
                    str(thread_id))
            except Exception as error:
                log.error(
                    "Can't create selenium driver for worker thread number " +
                    str(thread_id))
                log.error(str(error))

        i = 0
        autosave = 0

        if 'parsed_car' not in self.cars_db[itemid] and 'href' in self.cars_db[
                itemid]:
            log.debug("Parsing car: " + itemid)
            self.driver[thread_id].get(self.cars_db[itemid]['href'])
            self.driver[thread_id].get_screenshot_as_file(
                self.screenshots_path + self.cars_db_filename + "\\" + itemid +
                ".png")
            self.cars_db[itemid] = self.parse_car_properties(
                self.cars_db[itemid],
                BeautifulSoup(self.driver[thread_id].page_source,
                              features="lxml"))
            i += 1
            autosave += 1
        elif 'href' not in self.cars_db[itemid]:
            log.debug("Missing url: " + itemid)
        else:
            log.debug("Already parsed: " + itemid)

        if 'Images_URL' in self.cars_db[
                itemid] and 'images_downloaded' not in self.cars_db[itemid]:
            self.download_file(
                self.cars_db[itemid]['Images_URL'], self.photos_path +
                self.cars_db_filename + "\\" + itemid + ".zip")
            self.cars_db[itemid]['images_downloaded'] = "1"
        else:
            log.debug("Photos already downloaded for: " + itemid)

        if autosave >= self.autosave_period:
            autosave = 0
            self.write_cars_db()
Ejemplo n.º 8
0
  def _monitorJob(self, ftsJob):
    """
        * query the FTS servers
        * update the FTSFile status
        * update the FTSJob status
    """
    # General try catch to avoid that the tread dies
    try:
      threadID = current_process().name
      log = gLogger.getSubLogger("_monitorJob/%s" % ftsJob.jobID, child=True)

      res = self.getFTS3Context(
          ftsJob.username, ftsJob.userGroup, ftsJob.ftsServer, threadID=threadID)

      if not res['OK']:
        log.error("Error getting context", res)
        return ftsJob, res

      context = res['Value']

      res = ftsJob.monitor(context=context)

      if not res['OK']:
        log.error("Error monitoring job", res)
        return ftsJob, res

      # { fileID : { Status, Error } }
      filesStatus = res['Value']

      # Specify the job ftsGUID to make sure we do not overwrite
      # status of files already taken by newer jobs
      res = self.fts3db.updateFileStatus(filesStatus, ftsGUID=ftsJob.ftsGUID)

      if not res['OK']:
        log.error("Error updating file fts status", "%s, %s" % (ftsJob.ftsGUID, res))
        return ftsJob, res

      upDict = {
          ftsJob.jobID: {
              'status': ftsJob.status,
              'error': ftsJob.error,
              'completeness': ftsJob.completeness,
              'operationID': ftsJob.operationID,
              'lastMonitor': True,
          }
      }
      res = self.fts3db.updateJobStatus(upDict)

      if ftsJob.status in ftsJob.FINAL_STATES:
        self.__sendAccounting(ftsJob)

      return ftsJob, res

    except Exception as e:
      return ftsJob, S_ERROR(0, "Exception %s" % repr(e))
Ejemplo n.º 9
0
def upload_file_worker(ctx, asset_id, asset_type, filepath, chunk_size):
  print "upload_file_worker %s" % (filepath)
  """Upload a given file to an asset in its own thread as
  part of upload_files_multithreaded().

  Parameters
  ----------
  ctx : Context
    A Click Context object.
  asset_id : str
    The Id of a valid raster or vector asset.
  asset_type : int
    A GME asset type defined by the Asset class.
  filepath : str
    The absolute path to the file.
  chunk_size : int
    The size of each upload chunk (must be a multiple of 256KB). Defaults to -1 (native Python streaming)
  """
  @retries(1000)
  def next_chunk(ctx, request):
    return request.next_chunk()

  ctx.log("Begun uploading %s" % (os.path.basename(filepath)))
  start_time = time.time()

  media = MediaFileUpload(filepath, chunksize=chunk_size, resumable=True)
  if not media.mimetype():
    media = MediaFileUpload(filepath, mimetype='application/octet-stream', chunksize=chunk_size, resumable=True)

  resource = get_asset_resource(ctx.service(ident=current_process().ident), asset_type)
  request = resource.files().insert(id=asset_id, filename=os.path.basename(filepath), media_body=media)
  response = None
  while response is None:
    try:
      start_time_chunk = time.time()
      progress, response = next_chunk(ctx, request)
      # Dodgy math is dodgy
      # if progress:
      #   Mbps = ((chunk_size / (time.time() - start_time_chunk)) * 0.008 * 0.001)
      #   ctx.log("%s%% (%s/Mbps)" % (round(progress.progress() * 100), round(Mbps, 2)))
    except NoContent as e:
      # Files uploads return a 204 No Content "error" that actually means it's finished successfully.
      response = ""

  ctx.log("Finished uploading %s (%s mins)" % (os.path.basename(filepath), round((time.time() - start_time) / 60, 2)))
Ejemplo n.º 10
0
    def _upload(num_retries):
        worker_name = multiprocessing.current_process().name
        log.Debug("%s: Uploading chunk %d" % (worker_name, offset + 1))
        try:
            conn = get_connection(scheme, parsed_url)
            bucket = conn.lookup(bucket_name)

            for mp in bucket.get_all_multipart_uploads():
                if mp.id == multipart_id:
                    with FileChunkIO(filename, 'r', offset=offset * bytes, bytes=bytes) as fd:
                        mp.upload_part_from_file(fd, offset + 1, cb=_upload_callback)
                    break
        except Exception, e:
            traceback.print_exc()
            if num_retries:
                log.Debug("%s: Upload of chunk %d failed. Retrying %d more times..." % (
                    worker_name, offset + 1, num_retries - 1))
                return _upload(num_retries - 1)
            log.Debug("%s: Upload of chunk %d failed. Aborting..." % (
                worker_name, offset + 1))
            raise e
Ejemplo n.º 11
0
    def start(self):
        """
        This is a race condition in DaemonProcess.start() which was found
        during some of the test scans I run. The race condition exists
        because we're using Threads for a Pool that was designed to be
        used with real processes: thus there is no worker.exitcode,
        thus it has to be simulated in a race condition-prone way.

        I'm overriding this method in order to move this line:

            self._start_called = True

        Closer to the call to .start(), which should reduce the chances
        of triggering the race conditions by 1% ;-)
        """
        assert self._parent is current_process()

        if hasattr(self._parent, '_children'):
            self._parent._children[self] = None

        self._start_called = True
        threading.Thread.start(self)
Ejemplo n.º 12
0
    def start(self):
        """
        This is a race condition in DaemonProcess.start() which was found
        during some of the test scans I run. The race condition exists
        because we're using Threads for a Pool that was designed to be
        used with real processes: thus there is no worker.exitcode,
        thus it has to be simulated in a race condition-prone way.

        I'm overriding this method in order to move this line:

            self._start_called = True

        Closer to the call to .start(), which should reduce the chances
        of triggering the race conditions by 1% ;-)
        """
        assert self._parent is current_process()

        if hasattr(self._parent, '_children'):
            self._parent._children[self] = None

        self._start_called = True
        threading.Thread.start(self)
Ejemplo n.º 13
0
    def _treatOperation(self, operation):
        """ Treat one operation:
          * does the callback if the operation is finished
          * generate new jobs and submits them

        :param operation: the operation to treat

        :return: operation, S_OK()/S_ERROR()
    """
        try:
            threadID = current_process().name
            log = gLogger.getSubLogger("treatOperation/%s" %
                                       operation.operationID,
                                       child=True)

            # If the operation is totally processed
            # we perform the callback
            if operation.isTotallyProcessed():
                log.debug("FTS3Operation %s is totally processed" %
                          operation.operationID)
                res = operation.callback()

                if not res['OK']:
                    log.error("Error performing the callback", res)
                    log.info("Putting back the operation")
                    dbRes = self.fts3db.persistOperation(operation)

                    if not dbRes['OK']:
                        log.error("Could not persist operation", dbRes)

                    return operation, res

            else:
                log.debug("FTS3Operation %s is not totally processed yet" %
                          operation.operationID)

                # This flag is set to False if we want to stop the ongoing processing
                # of an operation, typically when the matching RMS Request has been
                # canceled (see below)
                continueOperationProcessing = True

                # Check the status of the associated RMS Request.
                # If it is canceled then we will not create new FTS3Jobs, and mark
                # this as FTS3Operation canceled.

                if operation.rmsReqID:
                    res = ReqClient().getRequestStatus(operation.rmsReqID)
                    if not res['OK']:
                        log.error("Could not get request status", res)
                        return operation, res
                    rmsReqStatus = res['Value']

                    if rmsReqStatus == 'Canceled':
                        log.info(
                            "The RMS Request is canceled, canceling the FTS3Operation",
                            "rmsReqID: %s, FTS3OperationID: %s" %
                            (operation.rmsReqID, operation.operationID))
                        operation.status = 'Canceled'
                        continueOperationProcessing = False

                if continueOperationProcessing:
                    res = operation.prepareNewJobs(
                        maxFilesPerJob=self.maxFilesPerJob,
                        maxAttemptsPerFile=self.maxAttemptsPerFile)

                    if not res['OK']:
                        log.error(
                            "Cannot prepare new Jobs",
                            "FTS3Operation %s : %s" %
                            (operation.operationID, res))
                        return operation, res

                    newJobs = res['Value']

                    log.debug("FTS3Operation %s: %s new jobs to be submitted" %
                              (operation.operationID, len(newJobs)))

                    for ftsJob in newJobs:
                        res = self._serverPolicy.chooseFTS3Server()
                        if not res['OK']:
                            log.error(res)
                            continue

                        ftsServer = res['Value']
                        log.debug("Use %s server" % ftsServer)

                        ftsJob.ftsServer = ftsServer

                        res = self.getFTS3Context(ftsJob.username,
                                                  ftsJob.userGroup,
                                                  ftsServer,
                                                  threadID=threadID)

                        if not res['OK']:
                            log.error("Could not get context", res)
                            continue

                        context = res['Value']
                        res = ftsJob.submit(context=context,
                                            protocols=self.thirdPartyProtocols)

                        if not res['OK']:
                            log.error(
                                "Could not submit FTS3Job",
                                "FTS3Operation %s : %s" %
                                (operation.operationID, res))
                            continue

                        operation.ftsJobs.append(ftsJob)

                        submittedFileIds = res['Value']
                        log.info(
                            "FTS3Operation %s: Submitted job for %s transfers"
                            % (operation.operationID, len(submittedFileIds)))

                # new jobs are put in the DB at the same time
            res = self.fts3db.persistOperation(operation)

            if not res['OK']:
                log.error("Could not persist operation", res)

            return operation, res

        except Exception as e:
            log.exception('Exception in the thread', repr(e))
            return operation, S_ERROR("Exception %s" % repr(e))
Ejemplo n.º 14
0
  def _treatOperation(self, operation):
    """ Treat one operation:
          * does the callback if the operation is finished
          * generate new jobs and submits them

          :param operation: the operation to treat
          :param threadId: the id of the tread, it just has to be unique (used for the context cache)
    """
    try:
      threadID = current_process().name
      log = gLogger.getSubLogger("treatOperation/%s" % operation.operationID, child=True)

      # If the operation is totally processed
      # we perform the callback
      if operation.isTotallyProcessed():
        log.debug("FTS3Operation %s is totally processed" % operation.operationID)
        res = operation.callback()

        if not res['OK']:
          log.error("Error performing the callback", res)
          log.info("Putting back the operation")
          dbRes = self.fts3db.persistOperation(operation)

          if not dbRes['OK']:
            log.error("Could not persist operation", dbRes)

          return operation, res

      else:
        log.debug("FTS3Operation %s is not totally processed yet" % operation.operationID)

        res = operation.prepareNewJobs(
            maxFilesPerJob=self.maxFilesPerJob, maxAttemptsPerFile=self.maxAttemptsPerFile)

        if not res['OK']:
          log.error("Cannot prepare new Jobs", "FTS3Operation %s : %s" %
                    (operation.operationID, res))
          return operation, res

        newJobs = res['Value']

        log.debug("FTS3Operation %s: %s new jobs to be submitted" %
                  (operation.operationID, len(newJobs)))

        for ftsJob in newJobs:
          res = self._serverPolicy.chooseFTS3Server()
          if not res['OK']:
            log.error(res)
            continue

          ftsServer = res['Value']
          log.debug("Use %s server" % ftsServer)

          ftsJob.ftsServer = ftsServer

          res = self.getFTS3Context(
              ftsJob.username, ftsJob.userGroup, ftsServer, threadID=threadID)

          if not res['OK']:
            log.error("Could not get context", res)
            continue

          context = res['Value']
          res = ftsJob.submit(context=context, protocols=self.thirdPartyProtocols)

          if not res['OK']:
            log.error("Could not submit FTS3Job", "FTS3Operation %s : %s" %
                      (operation.operationID, res))
            continue

          operation.ftsJobs.append(ftsJob)

          submittedFileIds = res['Value']
          log.info("FTS3Operation %s: Submitted job for %s transfers" %
                   (operation.operationID, len(submittedFileIds)))

        # new jobs are put in the DB at the same time
      res = self.fts3db.persistOperation(operation)

      if not res['OK']:
        log.error("Could not persist operation", res)

      return operation, res

    except Exception as e:
      log.exception('Exception in the thread', repr(e))
      return operation, S_ERROR("Exception %s" % repr(e))
Ejemplo n.º 15
0
 def _upload_callback(uploaded, total):
     worker_name = multiprocessing.current_process().name
     log.Debug("%s: Uploaded %s/%s bytes" % (worker_name, uploaded, total))
     if queue is not None:
         queue.put([uploaded, total])  # Push data to the consumer thread
Ejemplo n.º 16
0
 def name(self, person_name):
     print Threads.current_process().name
     for i in range(10):
         time.sleep(1)
         print 'hello %s %d' %(person_name,i)
Ejemplo n.º 17
0
 def _upload_callback(uploaded, total):
     worker_name = multiprocessing.current_process().name
     log.Debug("%s: Uploaded %s/%s bytes" % (worker_name, uploaded, total))
Ejemplo n.º 18
0
 def _upload_callback(uploaded, total):
     worker_name = multiprocessing.current_process().name
     log.Debug("%s: Uploaded %s/%s bytes" % (worker_name, uploaded, total))
Ejemplo n.º 19
0
 def name(self, person_name):
     print Threads.current_process().name
     for i in range(10):
         time.sleep(1)
         print 'hello %s %d' % (person_name, i)
Ejemplo n.º 20
0
def process_one_problem(problem, debug=False):
    global FAIL_COUNT
    if FAIL_COUNT > FAIL_COUNT_LIMIT:
        sys.stderr.write('FAIL_COUNT: {}\n'.format(FAIL_COUNT))
        time.sleep(60 * 20)  # wait 20 minutes
        FAIL_COUNT = 0
        return
    time.sleep(1 + random.random() * 3)
    pid = 5 if debug else int(current_process().name.split('-')[1]) - 1
    current_proxy = '127.0.0.1:24000'
    agent = USER_AGENTS[pid]
    try:
        with PRINT_LOCK:
            sys.stderr.write('processing problem {}{}\n'.format(problem['contestId'], problem['index']))
        r = get_data(SOLUTIONS_LINK.format(problem['contestId'], problem['index'], 1), is_api=False, proxy=current_proxy, agent=agent)
        tree = html.fromstring(r.text)

        if not tree.xpath('//a[@class="view-source"]'):
            raise Exception('no solutions found on page')

        last_page_node = tree.xpath('//li/span[@class="page-index"]/a/text()')
        last_page = int(last_page_node[-1]) if last_page_node else 1

        task_id = '{}/{}'.format(problem['contestId'], problem['index'])
        print 'page_index {}'.format(task_id), last_page, get_page_indexes(last_page)

        for page_index in get_page_indexes(last_page):

            r = get_data(
                SOLUTIONS_LINK.format(problem['contestId'], problem['index'], page_index),
                is_api=False,
                proxy=current_proxy,
                agent=agent,
            )
            tree = html.fromstring(r.text)
            solutions = tree.xpath('//a[@class="view-source"]')

            for solution in solutions:
                if FAIL_COUNT > FAIL_COUNT_LIMIT:
                    sys.stderr.write('FAIL_COUNT: {}\n'.format(FAIL_COUNT))
                    return
                submissionid = solution.attrib.get('submissionid', None)  # solution.xpath('text()')
                try:
                    try:
                        title = solution.xpath('../../td[3]/a')[0].attrib['title'].rsplit(' ', 1)[0]
                    except IndexError, e:
                        if solution.xpath('../../td[3]/span'):
                            continue  # team solution
                        else:
                            raise e
                    if TITLES_TO_RANK[title] < 5:
                        continue
                    lang = solution.xpath('../../td[5]/text()')[0]
                    if 'C++' not in lang:
                        continue

                    # src_code = download_ajax_source_code(problem['contestId'], problem['index'], submissionid)

                    filename = 'source_codes/{}/{}/{}.cpp'.format(
                        problem['contestId'],
                        problem['index'],
                        submissionid,
                    )
                    if task_id not in filename:
                        raise Exception('{} not in {}'.format(task_id, filename))
                    if os.path.exists(filename):
                        continue
                    src_code = download_source_code(solution.attrib['href'], current_proxy, agent)
                    if not os.path.exists(os.path.dirname(filename)):
                        try:
                            os.makedirs(os.path.dirname(filename))
                        except OSError as exc:  # Guard against race condition
                            if exc.errno != errno.EEXIST:
                                raise
                    with open(filename, 'w') as out:
                        out.write(src_code.encode('utf-8'))
                except Exception, e:
                    with PRINT_LOCK:
                        html_filename = save_html_for_debug(r)
                        sys.stderr.write('submissionid {}, err: {}, filename: {}\n{}\n'.format(
                            submissionid,
                            e,
                            html_filename,
                            traceback.format_exc(),
                        ))
Ejemplo n.º 21
0
 def _upload_callback(uploaded, total):
     worker_name = multiprocessing.current_process().name
     log.Debug("%s: Uploaded %s/%s bytes" % (worker_name, uploaded, total))
     if not queue is None:
         queue.put([uploaded, total]) # Push data to the consumer thread
Ejemplo n.º 22
0
    def _monitorJob(self, ftsJob):
        """* query the FTS servers
        * update the FTSFile status
        * update the FTSJob status

        :param ftsJob: FTS job

        :return: ftsJob, S_OK()/S_ERROR()
        """
        # General try catch to avoid that the tread dies
        try:
            threadID = current_process().name
            log = gLogger.getLocalSubLogger("_monitorJob/%s" % ftsJob.jobID)

            res = self.getFTS3Context(ftsJob.username, ftsJob.userGroup, ftsJob.ftsServer, threadID=threadID)

            if not res["OK"]:
                log.error("Error getting context", res)
                return ftsJob, res

            context = res["Value"]

            res = ftsJob.monitor(context=context)

            if not res["OK"]:
                log.error("Error monitoring job", res)

                # If the job was not found on the server, update the DB
                if cmpError(res, errno.ESRCH):
                    res = self.fts3db.cancelNonExistingJob(ftsJob.operationID, ftsJob.ftsGUID)

                return ftsJob, res

            # { fileID : { Status, Error } }
            filesStatus = res["Value"]

            # Specify the job ftsGUID to make sure we do not overwrite
            # status of files already taken by newer jobs
            res = self.fts3db.updateFileStatus(filesStatus, ftsGUID=ftsJob.ftsGUID)

            if not res["OK"]:
                log.error("Error updating file fts status", "%s, %s" % (ftsJob.ftsGUID, res))
                return ftsJob, res

            upDict = {
                ftsJob.jobID: {
                    "status": ftsJob.status,
                    "error": ftsJob.error,
                    "completeness": ftsJob.completeness,
                    "operationID": ftsJob.operationID,
                    "lastMonitor": True,
                }
            }
            res = self.fts3db.updateJobStatus(upDict)

            if ftsJob.status in ftsJob.FINAL_STATES:
                self.__sendAccounting(ftsJob)

            return ftsJob, res

        except Exception as e:
            log.exception("Exception while monitoring job", repr(e))
            return ftsJob, S_ERROR(0, "Exception %s" % repr(e))
Ejemplo n.º 23
0
  def _treatOperation(self, operation):
    """ Treat one operation:
          * does the callback if the operation is finished
          * generate new jobs and submits them

          :param operation: the operation to treat
          :param threadId: the id of the tread, it just has to be unique (used for the context cache)
    """
    try:
      threadID = current_process().name
      log = gLogger.getSubLogger("treatOperation/%s" % operation.operationID, child=True)

      # If the operation is totally processed
      # we perform the callback
      if operation.isTotallyProcessed():
        log.debug("FTS3Operation %s is totally processed" % operation.operationID)
        res = operation.callback()

        if not res['OK']:
          log.error("Error performing the callback", res)
          log.info("Putting back the operation")
          dbRes = self.fts3db.persistOperation(operation)

          if not dbRes['OK']:
            log.error("Could not persist operation", dbRes)

          return operation, res

      else:
        log.debug("FTS3Operation %s is not totally processed yet" % operation.operationID)

        res = operation.prepareNewJobs(
            maxFilesPerJob=self.maxFilesPerJob, maxAttemptsPerFile=self.maxAttemptsPerFile)

        if not res['OK']:
          log.error("Cannot prepare new Jobs", "FTS3Operation %s : %s" %
                    (operation.operationID, res))
          return operation, res

        newJobs = res['Value']

        log.debug("FTS3Operation %s: %s new jobs to be submitted" %
                  (operation.operationID, len(newJobs)))

        for ftsJob in newJobs:
          res = self._serverPolicy.chooseFTS3Server()
          if not res['OK']:
            log.error(res)
            continue

          ftsServer = res['Value']
          log.debug("Use %s server" % ftsServer)

          ftsJob.ftsServer = ftsServer

          res = self.getFTS3Context(
              ftsJob.username, ftsJob.userGroup, ftsServer, threadID=threadID)

          if not res['OK']:
            log.error("Could not get context", res)
            continue

          context = res['Value']
          res = ftsJob.submit(context=context)

          if not res['OK']:
            log.error("Could not submit FTS3Job", "FTS3Operation %s : %s" %
                      (operation.operationID, res))
            continue

          operation.ftsJobs.append(ftsJob)

          submittedFileIds = res['Value']
          log.info("FTS3Operation %s: Submitted job for %s transfers" %
                   (operation.operationID, len(submittedFileIds)))

        # new jobs are put in the DB at the same time
      res = self.fts3db.persistOperation(operation)

      if not res['OK']:
        log.error("Could not persist operation", res)

      return operation, res

    except Exception as e:
      log.exception('Exception in the thread', repr(e))
      return operation, S_ERROR("Exception %s" % repr(e))
 def __DummyProcess_start_patch(self):  # pulled from an updated version of Python
     assert self._parent is __mp_dummy.current_process()  # modified to avoid further imports
     self._start_called = True
     if hasattr(self._parent, '_children'):
         self._parent._children[self] = None
     __mp_dummy.threading.Thread.start(self)  # modified to avoid further imports