Exemple #1
0
def check_open_ports(url: str,
                     ip: str,
                     path: Optional[str] = None) -> List[Result]:
    results = []

    # create processing pool
    pool = Pool(os.cpu_count() * 2)
    mgr = Manager()
    queue = mgr.Queue()

    # read the data in from the data directory
    if path is None:
        file_path = pkg_resources.resource_filename(
            "yawast", "resources/common_ports.json")
    else:
        file_path = path

    with open(file_path) as json_file:
        data = json.load(json_file)

    for rec in data:
        pool.apply_async(_is_port_open, (url, ip, rec, queue))

    pool.close()
    pool.join()

    while not queue.empty():
        val = queue.get()
        if val is not None:
            results.append(val)

    return results
Exemple #2
0
    def __init__(self,
                 file_list,
                 mode,
                 database_path,
                 auto_tags=True,
                 temp_dir=None):
        # Have the GUI pass a list of files straight to here
        # Then, on the basis of what is needed, pass the
        # filenames to the requisite functions
        # This includes getting file info for the database
        # Parsing for the reader proper
        # Caching upon closing
        self.file_list = [i for i in file_list if os.path.exists(i)]
        self.statistics = [0, (len(file_list))]
        self.hashes_and_paths = {}
        self.work_mode = mode[0]
        self.addition_mode = mode[1]
        self.database_path = database_path
        self.auto_tags = auto_tags
        self.temp_dir = temp_dir
        if database_path:
            self.database_hashes()

        self.threading_completed = []
        self.queue = Manager().Queue()
        self.processed_books = []

        if self.work_mode == 'addition':
            progress_object_generator()
Exemple #3
0
    def __init__(self, file_list, mode, database_path, settings, temp_dir=None):
        # Have the GUI pass a list of files straight to here
        # Then, on the basis of what is needed, pass the
        # filenames to the requisite functions
        # This includes getting file info for the database
        # Parsing for the reader proper
        # Caching upon closing
        self.file_list = [i for i in file_list if os.path.exists(i)]
        self.statistics = [0, (len(file_list))]
        self.hashes_and_paths = {}
        self.work_mode = mode[0]
        self.addition_mode = mode[1]
        self.database_path = database_path
        self.auto_tags = settings['auto_tags']
        self.auto_cover = settings['auto_cover']
        self.temp_dir = temp_dir
        if database_path:
            self.database_hashes()

        self.threading_completed = []
        self.queue = Manager().Queue()
        self.errors = Manager().list()
        self.processed_books = []

        if self.work_mode == 'addition':
            progress_object_generator()
Exemple #4
0
def main():
    manager = Manager()
    lock = manager.Lock()
    partial_get_index = functools.partial(get_index, lock)
    pool = Pool()
    pool.map(partial_get_index, [i for i in range(START_PAGE, END_PAGE + 1)])
    pool.close()
    pool.join()
    print('over')
Exemple #5
0
    def calc(self, computeOptions, **args):
        tiles = self._get_tile_service().get_tiles_bounded_by_box(computeOptions.get_min_lat(), computeOptions.get_max_lat(),
                                                            computeOptions.get_min_lon(), computeOptions.get_max_lon(),
                                                            computeOptions.get_dataset()[0],
                                                            computeOptions.get_start_time(),
                                                            computeOptions.get_end_time())

        if len(tiles) == 0:
            raise NexusProcessingException.NoDataException(reason="No data found for selected timeframe")

        maxprocesses = int(self.algorithm_config.get("multiprocessing", "maxprocesses"))

        results = []
        if maxprocesses == 1:
            calculator = LongitudeHofMoellerCalculator()
            for x, tile in enumerate(tiles):
                result = calculator.longitude_time_hofmoeller_stats(tile, x)
                results.append(result)
        else:
            manager = Manager()
            work_queue = manager.Queue()
            done_queue = manager.Queue()
            for x, tile in enumerate(tiles):
                work_queue.put(
                    ('longitude_time_hofmoeller_stats', tile, x))
            [work_queue.put(SENTINEL) for _ in range(0, maxprocesses)]

            # Start new processes to handle the work
            pool = Pool(maxprocesses)
            [pool.apply_async(pool_worker, (LONGITUDE, work_queue, done_queue)) for _ in range(0, maxprocesses)]
            pool.close()

            # Collect the results
            for x, tile in enumerate(tiles):
                result = done_queue.get()
                try:
                    error_str = result['error']
                    logger.error(error_str)
                    raise NexusProcessingException(reason="Error calculating longitude_time_hofmoeller_stats.")
                except KeyError:
                    pass

                results.append(result)

            pool.terminate()
            manager.shutdown()

        results = sorted(results, key=lambda entry: entry["time"])

        results = self.applyDeseasonToHofMoeller(results, pivot="lons")

        result = HoffMoellerResults(results=results, computeOptions=computeOptions, type=HoffMoellerResults.LONGITUDE)
        return result
class ThreadingManager(object):
    """
    Usage:
    tm = ThreadingManager(2, get_detections=cerberus_api.get_detection)
    u = tm.queued_executor('get_detections', [777, 125])

    """
    def __init__(self, cpu_count=cpu_count(), **kwargs):
        self.m = Manager()
        self.pool = self.m.Pool(cpu_count)
        self.kwargs = kwargs
        self.queue = self.m.Queue()
        self.log_queue = self.m.Queue()

    def executor(self, function, *args, **kwargs):
        result = self.pool.map(self.kwargs[function], *args, **kwargs)
        return result

    def queued_executor(self, function, *args, **kwargs):
        result = self.pool.map(self.kwargs[function], *args, **kwargs)
        [self.queue.put(i) for i in result]
        return self.queue

    def async_executor(self, function, *args, **kwargs):
        result = self.pool.map_async(self.kwargs[function], *args, **kwargs)
        sys.stdout.flush()
        return result

    def star_executor(self, function, *args, **kwargs):
        result = self.pool.starmap(self.kwargs[function], *args, **kwargs)
        return result

    def star_queued_executor(self, function, *args, **kwargs):
        result = self.pool.starmap(self.kwargs[function], *args, **kwargs)
        [self.queue.put(i) for i in result]
        return self.queue

    def async_star_executor(self, function, *args, **kwargs):
        result = self.pool.starmap_async(self.kwargs[function], *args,
                                         **kwargs)
        return result

    def async_star_queued_executor(self, function, *args, **kwargs):
        result = self.pool.starmap_async(self.kwargs[function], *args,
                                         **kwargs)
        [self.queue.put(i) for i in result]
        return self.queue

    def close(self, exception_type, exception_value, traceback):
        self.pool._maintain_pool()
        self.pool.close()
        self.pool.join()
    def get_daily_difference_average_for_box(self, min_lat, max_lat, min_lon,
                                             max_lon, dataset1, dataset2,
                                             start_time, end_time):

        daysinrange = self._tile_service.find_days_in_range_asc(
            min_lat, max_lat, min_lon, max_lon, dataset1, start_time, end_time)

        maxprocesses = int(
            self.algorithm_config.get("multiprocessing", "maxprocesses"))

        if maxprocesses == 1:
            calculator = DailyDifferenceAverageCalculator()
            averagebyday = []
            for dayinseconds in daysinrange:
                result = calculator.calc_average_diff_on_day(
                    min_lat, max_lat, min_lon, max_lon, dataset1, dataset2,
                    dayinseconds)
                averagebyday.append((result[0], result[1]))
        else:
            # Create a task to calc average difference for each day
            manager = Manager()
            work_queue = manager.Queue()
            done_queue = manager.Queue()
            for dayinseconds in daysinrange:
                work_queue.put(
                    ('calc_average_diff_on_day', min_lat, max_lat, min_lon,
                     max_lon, dataset1, dataset2, dayinseconds))
            [work_queue.put(SENTINEL) for _ in xrange(0, maxprocesses)]

            # Start new processes to handle the work
            pool = Pool(maxprocesses)
            [
                pool.apply_async(pool_worker, (work_queue, done_queue))
                for _ in xrange(0, maxprocesses)
            ]
            pool.close()

            # Collect the results as [(day (in ms), average difference for that day)]
            averagebyday = []
            for i in xrange(0, len(daysinrange)):
                result = done_queue.get()
                if result[0] == 'error':
                    print >> sys.stderr, result[1]
                    raise NexusProcessingException(
                        reason="Error calculating average by day.")
                rdata = result
                averagebyday.append((rdata[0], rdata[1]))

            pool.terminate()
            manager.shutdown()

        return averagebyday
def main():
    old_folder_name = "Test"
    new_folder_name = old_folder_name + "_copy"
    os.mkdir(new_folder_name)
    pool = Pool(5)
    queue = Manager().Queue()
    file_list = os.listdir(old_folder_name)
    file_size = len(file_list)
    for file in file_list:
        pool.apply_async(copy_file_worker, (file, old_folder_name, new_folder_name, queue))
    num = 0
    while num < file_size:
        queue.get()
        num += 1
        print("Copy Rate: %.2f%%" % (num / file_size * 100))
    print("Done!")
def process_venue_list(venue_id_list):
    pool = ThreadPool(5)
    m = ThreadManager()
    d = m.dict()

    for venue_id in venue_id_list:
        d[venue_id] = 'none'

    print("[.] Processing %s venues" % len(venue_id_list))
    result = pool.map_async(process_venue,
                            zip(venue_id_list, itertools.repeat(d)))
    monitor_map_progress(result, d, len(venue_id_list))

    result.wait()
    _ = result.get()

    print("[x] Done with %s venues" % len(venue_id_list))
Exemple #10
0
def new_send_email(email):
    if not IS_ACTIVE:
        new_send_email.queue = Manager().Queue()
        process = Process(target=process_sent_queue,
                          args=(new_send_email.queue, ))
        process.daemon = True
        process.start()
        models.IS_ACTIVE = True
    new_send_email.queue.put(email)
Exemple #11
0
    def __init__(self, file_list, n_treads, q):
        assert isinstance(file_list, (list, tuple))
        assert isinstance(n_treads, int)
        assert n_treads >= 0
        assert type(q) is Manager().Queue

        self.is_active = True
        self.file_list = file_list
        self.n_treads = n_treads
        self.q = q
Exemple #12
0
def parallel_parsing(directory):
    trees = list()
    input_files = [java_file for java_file in get_java_files(directory)]
    # pool = Pool(processes=8)
    # x = pool.apply_async(create_project_parse_tree, args=(input_files[0], trees))

    # for java_file in get_java_files(directory):
    #     res = pool.apply_async(create_project_parse_tree, (java_file,))  # runs in *only* one process
    #     print(type(res.get(timeout=1)))  #

    # num = Value('d', 0.0)
    # arr = Array('i', range(500))
    with Manager() as manager:
        d = manager.dict()
        q = manager.Queue(10)
        p = Process(target=create_project_parse_tree, args=(input_files[0], q))
        p.start()
        p.join()
        print(q)
Exemple #13
0
            return True
        # return "done"

    def _upload_pool(self):
        pool = ThreadPool(processes=self.n_treads)
        res = pool.map(self._upload_file, self.file_list)
        n_res = len(res)
        res_dict = Counter(res)
        succes = res_dict[True]
        self.q.put("Uploaded {}/{}".format(succes, n_res))
        self.is_active = False

    def start(self):
        proc = Process(target=self._upload_pool)
        proc.start()


if __name__ == '__main__':
    q = Manager().Queue()

    files_list = [
        os.path.join(os.getcwd(), "test_data", i)
        for i in os.listdir("./test_data")
    ]
    uploader = Uploader(files_list, 2, q)
    uploader.start()

    while uploader.is_active:
        progress = q.get()
        print(progress)
Exemple #14
0
    dealdata = tuple(tuple([y for y in x]) for x in data)
    try:
        # 执行sql语句
        cursor.executemany(sql, dealdata)
        # 提交到数据库执行
        db.commit()
    except pm.Warning as w:
        print(repr(w))
        # 如果发生错误则回滚
        db.rollback()


if __name__ == '__main__':
    # pool = ThreadPool(4)
    # page = [x + 1 for x in range(18)]
    manager = Manager()
    lock = manager.Lock()
    # pool = ThreadPool(processes=8)
    for i in range(1, 20):
        # pool.apply_async(spider, (i, lock))
        spider(i)
    print(list)
    for i in list:
        if i == 21:
            continue
        else:
            dealdata(i)

    # results = pool.map(spider ,page)
    # pool.close()
    # pool.join()
Exemple #15
0
class BookSorter:
    def __init__(self,
                 file_list,
                 mode,
                 database_path,
                 settings,
                 temp_dir=None):
        # Have the GUI pass a list of files straight to here
        # Then, on the basis of what is needed, pass the
        # filenames to the requisite functions
        # This includes getting file info for the database
        # Parsing for the reader proper
        # Caching upon closing
        self.file_list = [i for i in file_list if os.path.exists(i)]
        self.statistics = [0, (len(file_list))]
        self.hashes_and_paths = {}
        self.work_mode = mode[0]
        self.addition_mode = mode[1]
        self.database_path = database_path
        self.auto_tags = settings['auto_tags']
        self.auto_cover = settings['auto_cover']
        self.temp_dir = temp_dir
        if database_path:
            self.database_hashes()

        self.threading_completed = []
        self.queue = Manager().Queue()
        self.errors = Manager().list()
        self.processed_books = []

        if self.work_mode == 'addition':
            progress_object_generator()

    def database_hashes(self):
        all_hashes_and_paths = database.DatabaseFunctions(
            self.database_path).fetch_data(('Hash', 'Path'), 'books',
                                           {'Hash': ''}, 'LIKE')

        if all_hashes_and_paths:
            self.hashes_and_paths = {i[0]: i[1] for i in all_hashes_and_paths}

    def database_entry_for_book(self, file_hash):
        database_return = database.DatabaseFunctions(
            self.database_path).fetch_data(
                ('Title', 'Author', 'Year', 'ISBN', 'Tags', 'Position',
                 'Bookmarks', 'CoverImage', 'Annotations'), 'books',
                {'Hash': file_hash}, 'EQUALS')[0]

        book_data = []

        for count, i in enumerate(database_return):
            if count in (
                    5, 6,
                    8):  # Position, Bookmarks, and Annotations are pickled
                if i:
                    book_data.append(pickle.loads(i))
                else:
                    book_data.append(None)
            else:
                book_data.append(i)

        return book_data

    def read_book(self, filename):
        # filename is expected as a string containg the
        # full path of the ebook file

        with open(filename, 'rb') as current_book:
            # This should speed up addition for larger files
            # without compromising the integrity of the process
            first_bytes = current_book.read(1024 *
                                            32)  # First 32KB of the file
            file_md5 = hashlib.md5(first_bytes).hexdigest()

        # Update the progress queue
        self.queue.put(filename)

        # This should not get triggered in reading mode
        # IF the file is NOT being loaded into the reader

        # Do not allow addition in case the file
        # is already in the database and it remains at its original path
        if self.work_mode == 'addition' and file_md5 in self.hashes_and_paths:
            if (self.hashes_and_paths[file_md5] == filename
                    or os.path.exists(self.hashes_and_paths[file_md5])):

                if not self.hashes_and_paths[file_md5] == filename:
                    warning_string = (
                        f'{os.path.basename(filename)} is already in database')
                    logger.warning(warning_string)
                return

        # This allows for eliminating issues with filenames that have
        # a dot in them. All hail the roundabout fix.
        valid_extension = False
        for i in sorter:
            if os.path.basename(filename).endswith(i):
                file_extension = i
                valid_extension = True
                break

        if not valid_extension:
            this_error = 'Unsupported extension: ' + filename
            self.errors.append(this_error)
            logger.error(this_error)
            return

        book_ref = sorter[file_extension](filename, self.temp_dir, file_md5)

        # None of the following have an exception type specified
        # This will keep everything from crashing, but will make
        # troubleshooting difficult
        # TODO
        # In application notifications

        try:
            book_ref.read_book()
        except Exception as e:
            this_error = f'Error initializing: {filename}'
            self.errors.append(this_error)
            logger.exception(this_error +
                             f' {type(e).__name__} Arguments: {e.args}')
            return

        this_book = {}
        this_book[file_md5] = {'hash': file_md5, 'path': filename}

        # Different modes require different values
        if self.work_mode == 'addition':
            try:
                metadata = book_ref.generate_metadata()
            except Exception as e:
                this_error = f'Metadata generation error: {filename}'
                self.errors.append(this_error)
                logger.exception(this_error +
                                 f' {type(e).__name__} Arguments: {e.args}')
                return

            title = metadata.title
            author = metadata.author
            year = metadata.year
            isbn = metadata.isbn

            tags = None
            if self.auto_tags:
                tags = metadata.tags

            cover_image_raw = metadata.cover
            if cover_image_raw:
                cover_image = resize_image(cover_image_raw)
            else:
                cover_image = None
                if self.auto_cover:
                    cover_image = fetch_cover(title, author)

            this_book[file_md5]['cover_image'] = cover_image
            this_book[file_md5]['addition_mode'] = self.addition_mode

        if self.work_mode == 'reading':
            try:
                book_breakdown = book_ref.generate_content()
            except Exception as e:
                this_error = f'Content generation error: {filename}'
                self.errors.append(this_error)
                logger.exception(this_error +
                                 f' {type(e).__name__} Arguments: {e.args}')
                return

            toc = book_breakdown[0]
            content = book_breakdown[1]
            images_only = book_breakdown[2]

            try:
                book_data = self.database_entry_for_book(file_md5)
            except TypeError:
                logger.error(
                    f'Database error: {filename}. Re-add book to program')
                return

            title = book_data[0].replace('&', '&&')
            author = book_data[1]
            year = book_data[2]
            isbn = book_data[3]
            tags = book_data[4]
            position = book_data[5]
            bookmarks = book_data[6]
            cover = book_data[7]
            annotations = book_data[8]

            this_book[file_md5]['position'] = position
            this_book[file_md5]['bookmarks'] = bookmarks
            this_book[file_md5]['toc'] = toc
            this_book[file_md5]['content'] = content
            this_book[file_md5]['images_only'] = images_only
            this_book[file_md5]['cover'] = cover
            this_book[file_md5]['annotations'] = annotations

        this_book[file_md5]['title'] = title
        this_book[file_md5]['author'] = author
        this_book[file_md5]['year'] = year
        this_book[file_md5]['isbn'] = isbn
        this_book[file_md5]['tags'] = tags

        return this_book

    def read_progress(self):
        while True:
            processed_file = self.queue.get()
            self.threading_completed.append(processed_file)

            total_number = len(self.file_list)
            completed_number = len(self.threading_completed)

            # Just for the record, this slows down book searching by about 20%
            if _progress_emitter:  # Skip update in reading mode
                _progress_emitter.update_progress(completed_number * 100 //
                                                  total_number)

            if total_number == completed_number:
                break

    def initiate_threads(self):
        if not self.file_list:
            return None

        def pool_creator():
            _pool = Pool(thread_count)
            self.processed_books = _pool.map(self.read_book, self.file_list)

            _pool.close()
            _pool.join()

        start_time = time.time()

        worker_thread = threading.Thread(target=pool_creator)
        progress_thread = threading.Thread(target=self.read_progress)
        worker_thread.start()
        progress_thread.start()

        worker_thread.join()
        progress_thread.join(timeout=.5)

        return_books = {}
        # Exclude None returns generated in case of duplication / parse errors
        self.processed_books = [i for i in self.processed_books if i]
        for i in self.processed_books:
            for j in i:
                return_books[j] = i[j]

        del self.processed_books
        processing_time = str(time.time() - start_time)
        logger.info('Finished processing in ' + processing_time)

        return return_books, self.errors
Exemple #16
0
    def getTimeSeriesStatsForBoxSingleDataSet(self, bounding_polygon, ds, start_seconds_from_epoch,
                                              end_seconds_from_epoch,
                                              apply_seasonal_cycle_filter=True, apply_low_pass_filter=True):

        the_time = datetime.now()
        daysinrange = self._get_tile_service().find_days_in_range_asc(bounding_polygon.bounds[1],
                                                                bounding_polygon.bounds[3],
                                                                bounding_polygon.bounds[0],
                                                                bounding_polygon.bounds[2],
                                                                ds,
                                                                start_seconds_from_epoch,
                                                                end_seconds_from_epoch)
        logger.info("Finding days in range took %s for dataset %s" % (str(datetime.now() - the_time), ds))

        if len(daysinrange) == 0:
            raise NoDataException(reason="No data found for selected timeframe")

        the_time = datetime.now()
        maxprocesses = int(self.algorithm_config.get("multiprocessing", "maxprocesses"))

        results = []
        if maxprocesses == 1:
            calculator = TimeSeriesCalculator()
            for dayinseconds in daysinrange:
                result = calculator.calc_average_on_day(bounding_polygon.wkt, ds, dayinseconds)
                results += [result] if result else []
        else:
            # Create a task to calc average difference for each day
            manager = Manager()
            work_queue = manager.Queue()
            done_queue = manager.Queue()
            for dayinseconds in daysinrange:
                work_queue.put(
                    ('calc_average_on_day', bounding_polygon.wkt, ds, dayinseconds))
            [work_queue.put(SENTINEL) for _ in range(0, maxprocesses)]

            # Start new processes to handle the work
            pool = Pool(maxprocesses)
            [pool.apply_async(pool_worker, (work_queue, done_queue)) for _ in range(0, maxprocesses)]
            pool.close()

            # Collect the results as [(day (in ms), average difference for that day)]
            for i in range(0, len(daysinrange)):
                result = done_queue.get()
                try:
                    error_str = result['error']
                    logger.error(error_str)
                    raise NexusProcessingException(reason="Error calculating average by day.")
                except KeyError:
                    pass

                results += [result] if result else []

            pool.terminate()
            manager.shutdown()

        results = sorted(results, key=lambda entry: entry["time"])
        logger.info("Time series calculation took %s for dataset %s" % (str(datetime.now() - the_time), ds))

        if apply_seasonal_cycle_filter:
            the_time = datetime.now()
            for result in results:
                month = datetime.utcfromtimestamp(result['time']).month
                month_mean, month_max, month_min = self.calculate_monthly_average(month, bounding_polygon.wkt, ds)
                seasonal_mean = result['mean'] - month_mean
                seasonal_min = result['min'] - month_min
                seasonal_max = result['max'] - month_max
                result['meanSeasonal'] = seasonal_mean
                result['minSeasonal'] = seasonal_min
                result['maxSeasonal'] = seasonal_max
            logger.info(
                "Seasonal calculation took %s for dataset %s" % (str(datetime.now() - the_time), ds))

        the_time = datetime.now()
        filtering.applyAllFiltersOnField(results, 'mean', applySeasonal=False, applyLowPass=apply_low_pass_filter)
        filtering.applyAllFiltersOnField(results, 'max', applySeasonal=False, applyLowPass=apply_low_pass_filter)
        filtering.applyAllFiltersOnField(results, 'min', applySeasonal=False, applyLowPass=apply_low_pass_filter)

        if apply_seasonal_cycle_filter and apply_low_pass_filter:
            try:
                filtering.applyFiltersOnField(results, 'meanSeasonal', applySeasonal=False, applyLowPass=True,
                                              append="LowPass")
                filtering.applyFiltersOnField(results, 'minSeasonal', applySeasonal=False, applyLowPass=True,
                                              append="LowPass")
                filtering.applyFiltersOnField(results, 'maxSeasonal', applySeasonal=False, applyLowPass=True,
                                              append="LowPass")
            except Exception as e:
                # If it doesn't work log the error but ignore it
                tb = traceback.format_exc()
                logger.warn("Error calculating SeasonalLowPass filter:\n%s" % tb)

        logger.info(
            "LowPass filter calculation took %s for dataset %s" % (str(datetime.now() - the_time), ds))

        return results, {}
 def __init__(self, cpu_count=cpu_count(), **kwargs):
     self.m = Manager()
     self.pool = self.m.Pool(cpu_count)
     self.kwargs = kwargs
     self.queue = self.m.Queue()
     self.log_queue = self.m.Queue()
Exemple #18
0
    def getTimeSeriesStatsForBoxSingleDataSet(self,
                                              min_lat,
                                              max_lat,
                                              min_lon,
                                              max_lon,
                                              ds,
                                              start_time=0,
                                              end_time=-1,
                                              applySeasonalFilter=True,
                                              applyLowPass=True):

        daysinrange = self._get_tile_service().find_days_in_range_asc(
            min_lat, max_lat, min_lon, max_lon, ds, start_time, end_time)

        if len(daysinrange) == 0:
            raise NoDataException(
                reason="No data found for selected timeframe")

        maxprocesses = int(
            self.algorithm_config.get("multiprocessing", "maxprocesses"))

        results = []
        if maxprocesses == 1:
            calculator = TimeSeriesCalculator()
            for dayinseconds in daysinrange:
                result = calculator.calc_average_on_day(
                    min_lat, max_lat, min_lon, max_lon, ds, dayinseconds)
                results.append(result)
        else:
            # Create a task to calc average difference for each day
            manager = Manager()
            work_queue = manager.Queue()
            done_queue = manager.Queue()
            for dayinseconds in daysinrange:
                work_queue.put(('calc_average_on_day', min_lat, max_lat,
                                min_lon, max_lon, ds, dayinseconds))
            [work_queue.put(SENTINEL) for _ in range(0, maxprocesses)]

            # Start new processes to handle the work
            pool = Pool(maxprocesses)
            [
                pool.apply_async(pool_worker, (work_queue, done_queue))
                for _ in range(0, maxprocesses)
            ]
            pool.close()

            # Collect the results as [(day (in ms), average difference for that day)]
            for i in range(0, len(daysinrange)):
                result = done_queue.get()
                try:
                    error_str = result['error']
                    logger.error(error_str)
                    raise NexusProcessingException(
                        reason="Error calculating average by day.")
                except KeyError:
                    pass

                results.append(result)

            pool.terminate()
            manager.shutdown()

        results = sorted(results, key=lambda entry: entry["time"])

        filt.applyAllFiltersOnField(results,
                                    'mean',
                                    applySeasonal=applySeasonalFilter,
                                    applyLowPass=applyLowPass)
        filt.applyAllFiltersOnField(results,
                                    'max',
                                    applySeasonal=applySeasonalFilter,
                                    applyLowPass=applyLowPass)
        filt.applyAllFiltersOnField(results,
                                    'min',
                                    applySeasonal=applySeasonalFilter,
                                    applyLowPass=applyLowPass)

        return results, {}
    def get_init_params(self, manager: Manager):
        d = {}
        if manager is not None:
            d = manager.dict()

        return None, self.zoom_offset, self.max_zoom_level, d
Exemple #20
0
        'phone_num':
        'M6Y3WpfSNET9W4ZwcML1tUx+jvOWtaDKwoUM3ABM+o7AXi8yZKplkUSM3u3R9cN+x4CNZ2Mo/SHFqB8nQWNt9WHEKc3iC0nSfTfbhlLJECCLpB60Cpbo7HKjE9dlY8s7kJY8bCn+xHAXEGg/2avB2SRPFLPo+Nm0JO6R07Sof4U=',
        'pwd':
        'OKNkTFqOU26Adb/9IAvze4K+u6aBHpd9cvBuyRWWAifDyb48wAvLbGUHfj0ZtTvGdg3Y2k8x9EyzcvW/G36R9ukCVpa+xJFztKM8GIl1q71OPNSTx0u1+EM6JiZnGxvPWApt0coRLm64BkRBcbhgliSauUlheBBfoAIADSNlXpw='
    }

    headers['Cookie'] = get_Cookie(form_data, headers)
    if headers['Cookie'] is None:
        print('Cookie is None')
        sys.exit()

    # url = 'https://api.xiaoheihe.cn/game/pubg/get_stats_detail/?lang=zh-cn&os_type=iOS&os_version=10.3.3&_time=1548776142&version=1.1.52&device_id=D2AA4D4F-AC80-476C-BFE1-CBD83AB74133&heybox_id=5141514&hkey=06a344301cb7c6cdc1136a62c061c978&fpp=0&mode=solo&nickname=HuYaTV_15310849&region=steam&season=pc-2018-02
    # 上面为抓包拿到的url,经过测试,其他参数可省略,只需下面的url即可
    url1 = 'https://api.xiaoheihe.cn/game/pubg/get_stats_detail/?heybox_id=14909789&region=steam'
    put_thread_pool = ThreadPool(5)
    get_thread_pool = ThreadPool()
    queue = ThreadManager().Queue(
    )  # 线程池之间通信需要用Manager().Queue(),线程间通信用Queue()
    for i in range(len(par)):
        # 利用线程池的优点在于便于控制线程数量,生产线程池中最多有五个生产线程,提高了生产效率,又不会在线程间切换花费太多时间
        put_thread_pool.apply_async(put_into_pool,
                                    (queue, url1, par[i], headers))

    time.sleep(5)  # 让生产者先生产5s,保证queue中有初始数据量

    get_thread_pool.apply_async(get_from_pool, (queue, db))

    queue.join()  # 目的是阻塞主线程

    db.close()
Exemple #21
0
import boto3
import botocore
import random

from botocore.client import Config
from botocore.endpoint import MAX_POOL_CONNECTIONS
from multiprocessing import TimeoutError
from multiprocessing.dummy import Pool as ThreadPool, Manager, Value

from enumerate_iam.utils.remove_metadata import remove_metadata
from enumerate_iam.utils.json_utils import json_encoder
from enumerate_iam.bruteforce_tests import BRUTEFORCE_TESTS

MAX_THREADS = 12
CLIENT_POOL = {}
MANAGER = Manager()
STOP_SIGNAL = MANAGER.Value('i', 0)


def report_arn(candidate):
    """
    Attempt to extract and slice up an ARN from the input string
    """
    logger = logging.getLogger()

    arn_search = re.search(r'.*(arn:aws:.*?) .*', candidate)

    if arn_search:
        arn = arn_search.group(1)

        arn_id = arn.split(':')[4]
Exemple #22
0
import json
import logging
from multiprocessing.dummy import Manager
import random
from urllib.parse import urlparse, parse_qs
from config.config import BASEPYCURLCONFIG
from core import MySpider
from core.Spider import CrawlJob

__author__ = 'Florian'
m = Manager()
crawled = m.dict()

logger = logging.getLogger("logger")
fh = logging.FileHandler("clubs.jsonl", 'a+')
simpleFormat = logging.Formatter("%(message)s")
fh.setFormatter(simpleFormat)
fh.setLevel(logging.WARNING)
ch = logging.StreamHandler()
ch.setFormatter(simpleFormat)
ch.setLevel(logging.DEBUG)
logger.addHandler(fh)
logger.addHandler(ch)
logger.setLevel(logging.DEBUG)


@MySpider.QueueInitializer.register()
def seeds():
    jobs = []
    logger = logging.getLogger("logger")
    for federation in [
Exemple #23
0
import datetime
from multiprocessing.dummy import Pool as ThreadPool
from multiprocessing.dummy import Manager
import openalpr_api
from openalpr_api.rest import ApiException
from sinesp_client import SinespClient
from requests.packages.urllib3.exceptions import InsecureRequestWarning
import requests
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

# initiating globals
now = datetime.datetime.now()
sc = SinespClient()
pool = ThreadPool(4)  # number of threads for parallelism
lock = Manager().Lock()


def process_cars(car_images):
    return pool.map(process_car_image, car_images)


def process_car_image(image_path):
    # create an instance of the API class
    api_instance = openalpr_api.DefaultApi()
    secret_key = 'sk_4fb6feea66fb6b5524b2d984'  # str | The secret key used to authenticate your account.  You can view your  secret key by visiting  https://cloud.openalpr.com/
    country = 'br'  # str | Defines the training data used by OpenALPR.  \"us\" analyzes  North-American style plates.  \"eu\" analyzes European-style plates.  This field is required if using the \"plate\" task  You may use multiple datasets by using commas between the country  codes.  For example, 'au,auwide' would analyze using both the  Australian plate styles.  A full list of supported country codes  can be found here https://github.com/openalpr/openalpr/tree/master/runtime_data/config
    recognize_vehicle = 0  # int | If set to 1, the vehicle will also be recognized in the image This requires an additional credit per request  (optional) (default to 0)
    state = ''  # str | Corresponds to a US state or EU country code used by OpenALPR pattern  recognition.  For example, using \"md\" matches US plates against the  Maryland plate patterns.  Using \"fr\" matches European plates against  the French plate patterns.  (optional) (default to )
    return_image = 0  # int | If set to 1, the image you uploaded will be encoded in base64 and  sent back along with the response  (optional) (default to 0)
    topn = 10  # int | The number of results you would like to be returned for plate  candidates and vehicle classifications  (optional) (default to 10)
    prewarp = ''  # str | Prewarp configuration is used to calibrate the analyses for the  angle of a particular camera.  More information is available here http://doc.openalpr.com/accuracy_improvements.html#calibration  (optional) (default to )
Exemple #24
0
class BookSorter:
    def __init__(self,
                 file_list,
                 mode,
                 database_path,
                 auto_tags=True,
                 temp_dir=None):
        # Have the GUI pass a list of files straight to here
        # Then, on the basis of what is needed, pass the
        # filenames to the requisite functions
        # This includes getting file info for the database
        # Parsing for the reader proper
        # Caching upon closing
        self.file_list = [i for i in file_list if os.path.exists(i)]
        self.statistics = [0, (len(file_list))]
        self.hashes_and_paths = {}
        self.work_mode = mode[0]
        self.addition_mode = mode[1]
        self.database_path = database_path
        self.auto_tags = auto_tags
        self.temp_dir = temp_dir
        if database_path:
            self.database_hashes()

        self.threading_completed = []
        self.queue = Manager().Queue()
        self.processed_books = []

        if self.work_mode == 'addition':
            progress_object_generator()

    def database_hashes(self):
        all_hashes_and_paths = database.DatabaseFunctions(
            self.database_path).fetch_data(('Hash', 'Path'), 'books',
                                           {'Hash': ''}, 'LIKE')

        if all_hashes_and_paths:
            # self.hashes = [i[0] for i in all_hashes]
            self.hashes_and_paths = {i[0]: i[1] for i in all_hashes_and_paths}

    def database_entry_for_book(self, file_hash):
        database_return = database.DatabaseFunctions(
            self.database_path).fetch_data(
                ('Title', 'Author', 'Year', 'ISBN', 'Tags', 'Position',
                 'Bookmarks', 'CoverImage', 'Annotations'), 'books',
                {'Hash': file_hash}, 'EQUALS')[0]

        book_data = []

        for count, i in enumerate(database_return):
            if count in (
                    5, 6,
                    8):  # Position, Bookmarks, and Annotations are pickled
                if i:
                    book_data.append(pickle.loads(i))
                else:
                    book_data.append(None)
            else:
                book_data.append(i)

        return book_data

    def read_book(self, filename):
        # filename is expected as a string containg the
        # full path of the ebook file

        with open(filename, 'rb') as current_book:
            # This should speed up addition for larger files
            # without compromising the integrity of the process
            first_bytes = current_book.read(1024 *
                                            32)  # First 32KB of the file
            file_md5 = hashlib.md5(first_bytes).hexdigest()

        # Update the progress queue
        self.queue.put(filename)

        # This should not get triggered in reading mode
        # IF the file is NOT being loaded into the reader

        # Do not allow addition in case the file
        # is already in the database and it remains at its original path
        if self.work_mode == 'addition' and file_md5 in self.hashes_and_paths:
            if (self.hashes_and_paths[file_md5] == filename
                    or os.path.exists(self.hashes_and_paths[file_md5])):

                if not self.hashes_and_paths[file_md5] == filename:
                    print(
                        f'{os.path.basename(filename)} is already in database')
                return

        # This allows for eliminating issues with filenames that have
        # a dot in them. All hail the roundabout fix.
        valid_extension = False
        for i in sorter:
            if os.path.basename(filename).endswith(i):
                file_extension = i
                valid_extension = True
                break

        if not valid_extension:
            print(filename + ' has an unsupported extension')
            return

        book_ref = sorter[file_extension](filename, self.temp_dir, file_md5)

        # Everything following this is standard
        # None values are accounted for here
        is_valid = book_ref.read_book()
        if not is_valid:
            print('Cannot parse: ' + filename)
            return

        if book_ref.book:

            this_book = {}
            this_book[file_md5] = {'hash': file_md5, 'path': filename}

            # Different modes require different values
            if self.work_mode == 'addition':
                # Reduce the size of the incoming image
                # if one is found
                title = book_ref.get_title()
                author = book_ref.get_author()
                year = book_ref.get_year()
                isbn = book_ref.get_isbn()

                tags = None
                if self.auto_tags:
                    tags = book_ref.get_tags()

                cover_image_raw = book_ref.get_cover_image()
                if cover_image_raw:
                    cover_image = resize_image(cover_image_raw)
                else:
                    cover_image = None

                this_book[file_md5]['cover_image'] = cover_image
                this_book[file_md5]['addition_mode'] = self.addition_mode

            if self.work_mode == 'reading':
                all_content = book_ref.get_contents()

                # get_contents() returns a tuple. Index 1 is a collection of
                # special settings that depend on the kind of data being parsed.
                # Currently, this includes:
                # Only images included      images_only     BOOL    Book contains only images

                content = all_content[0]
                images_only = all_content[1]['images_only']

                if not content:
                    content = [('Invalid', 'Something went horribly wrong')]

                book_data = self.database_entry_for_book(file_md5)
                title = book_data[0]
                author = book_data[1]
                year = book_data[2]
                isbn = book_data[3]
                tags = book_data[4]
                position = book_data[5]
                bookmarks = book_data[6]
                cover = book_data[7]
                annotations = book_data[8]

                this_book[file_md5]['position'] = position
                this_book[file_md5]['bookmarks'] = bookmarks
                this_book[file_md5]['content'] = content
                this_book[file_md5]['images_only'] = images_only
                this_book[file_md5]['cover'] = cover
                this_book[file_md5]['annotations'] = annotations

            this_book[file_md5]['title'] = title
            this_book[file_md5]['author'] = author
            this_book[file_md5]['year'] = year
            this_book[file_md5]['isbn'] = isbn
            this_book[file_md5]['tags'] = tags

            return this_book

    def read_progress(self):
        while True:
            processed_file = self.queue.get()
            self.threading_completed.append(processed_file)

            total_number = len(self.file_list)
            completed_number = len(self.threading_completed)

            if progress_emitter:  # Skip update in reading mode
                progress_emitter.update_progress(completed_number * 100 //
                                                 total_number)

            if total_number == completed_number:
                break

    def initiate_threads(self):
        if not self.file_list:
            return None

        def pool_creator():
            _pool = Pool(5)
            self.processed_books = _pool.map(self.read_book, self.file_list)

            _pool.close()
            _pool.join()

        start_time = time.time()

        worker_thread = threading.Thread(target=pool_creator)
        progress_thread = threading.Thread(target=self.read_progress)
        worker_thread.start()
        progress_thread.start()

        worker_thread.join()
        progress_thread.join(timeout=.5)

        return_books = {}
        # Exclude None returns generated in case of duplication / parse errors
        self.processed_books = [i for i in self.processed_books if i]
        for i in self.processed_books:
            for j in i:
                return_books[j] = i[j]

        del self.processed_books
        print('Finished processing in', time.time() - start_time)
        return return_books
Exemple #25
0
                d['song_id'] = dict['song_id']
                db.save_one_data_to_comment(d)  # 存储最新评论
            queue.task_done()  # 标记该数据已从队列中取出
        except:
            print("queue is empty wait for a while")
            time.sleep(1)


if __name__ == '__main__':
    # start_time = time.time()
    db = DbHelper()
    db.connenct(const.DB_CONFIGS)

    put_thread_pool = ThreadPool(3)
    get_thread_pool = ThreadPool(3)
    queue = ThreadManager().Queue(
    )  # 线程池之间通信需要用Manager().Queue(),线程间通信用Queue()

    comment_url = 'https://music.163.com/weapi/v1/resource/comments/R_SO_4_{}?csrf_token='  # 评论接口
    random_str = create_random_str(16)
    params = get_params(const.FIRST_PARAM, const.FORTH_PARAM, random_str)
    encSecKey = get_encSecKey(random_str, const.SECOND_PARAM,
                              const.THIRD_PARAM)
    form_data = {'params': params, 'encSecKey': encSecKey}

    response = get_html(const.DAY_LIST_URL)
    links = []
    song_ids = []
    # 先将云音乐飙升榜中的一百首歌曲相关信息存入数据库
    for data in get_day_hot_song(response):
        db.save_one_data_to_day_hot_song(data)
        song_id = data['song_id']
                    format(process_number, repr(e)))
                sleep(3)

    return my_func


@error_catching
def f(process_number):
    print("starting worker:", process_number)
    while True:
        sleep(2)
        print("Worker {} checks in.".format(process_number))


if __name__ == '__main__':
    processes = []
    manager = Manager()

    for i in range(3):
        p = Process(target=f)
        p.daemon = True
        p.start()
        processes.append(p)

    try:
        while True:
            sleep(1)
    except KeyboardInterrupt:
        print("Keyboard interrupt in main")
        sys.exit()
Exemple #27
0
import os
import unittest
from multiprocessing.dummy import Manager

from main import Uploader

q = Manager().Queue()
files_list = [
    os.path.join(os.getcwd(), "test_data", i)
    for i in os.listdir("./test_data")
]


class Test(unittest.TestCase):
    def test_1(self):
        self.assertIsInstance(Uploader(files_list, 2, q), Uploader)

    def test_2(self):
        self.assertIsInstance(Uploader("", 2, q), Uploader)

    def test_3(self):
        self.assertIsInstance(Uploader(files_list, "2", q), Uploader)

    def test_4(self):
        self.assertIsInstance(Uploader(files_list, 2, "q"), Uploader)


if __name__ == '__main__':
    unittest.main()
Exemple #28
0
class BookSorter:
    def __init__(self, file_list, mode, database_path, settings, temp_dir=None):
        # Have the GUI pass a list of files straight to here
        # Then, on the basis of what is needed, pass the
        # filenames to the requisite functions
        # This includes getting file info for the database
        # Parsing for the reader proper
        # Caching upon closing
        self.file_list = [i for i in file_list if os.path.exists(i)]
        self.statistics = [0, (len(file_list))]
        self.hashes_and_paths = {}
        self.work_mode = mode[0]
        self.addition_mode = mode[1]
        self.database_path = database_path
        self.auto_tags = settings['auto_tags']
        self.auto_cover = settings['auto_cover']
        self.temp_dir = temp_dir
        if database_path:
            self.database_hashes()

        self.threading_completed = []
        self.queue = Manager().Queue()
        self.errors = Manager().list()
        self.processed_books = []

        if self.work_mode == 'addition':
            progress_object_generator()

    def database_hashes(self):
        all_hashes_and_paths = database.DatabaseFunctions(
            self.database_path).fetch_data(
                ('Hash', 'Path'),
                'books',
                {'Hash': ''},
                'LIKE')

        if all_hashes_and_paths:
            self.hashes_and_paths = {
                i[0]: i[1] for i in all_hashes_and_paths}

    def database_entry_for_book(self, file_hash):
        database_return = database.DatabaseFunctions(
            self.database_path).fetch_data(
                ('Title', 'Author', 'Year', 'ISBN', 'Tags',
                 'Position', 'Bookmarks', 'CoverImage', 'Annotations'),
                'books',
                {'Hash': file_hash},
                'EQUALS')[0]

        book_data = []

        for count, i in enumerate(database_return):
            if count in (5, 6, 8):  # Position, Bookmarks, and Annotations are pickled
                if i:
                    book_data.append(pickle.loads(i))
                else:
                    book_data.append(None)
            else:
                book_data.append(i)

        return book_data

    def read_book(self, filename):
        # filename is expected as a string containg the
        # full path of the ebook file

        with open(filename, 'rb') as current_book:
            # This should speed up addition for larger files
            # without compromising the integrity of the process
            first_bytes = current_book.read(1024 * 32)  # First 32KB of the file
            file_md5 = hashlib.md5(first_bytes).hexdigest()

        # Update the progress queue
        self.queue.put(filename)

        # This should not get triggered in reading mode
        # IF the file is NOT being loaded into the reader

        # Do not allow addition in case the file
        # is already in the database and it remains at its original path
        if self.work_mode == 'addition' and file_md5 in self.hashes_and_paths:
            if (self.hashes_and_paths[file_md5] == filename
                    or os.path.exists(self.hashes_and_paths[file_md5])):

                if not self.hashes_and_paths[file_md5] == filename:
                    warning_string = (
                        f'{os.path.basename(filename)} is already in database')
                    logger.warning(warning_string)
                return

        # This allows for eliminating issues with filenames that have
        # a dot in them. All hail the roundabout fix.
        valid_extension = False
        for i in sorter:
            if os.path.basename(filename).endswith(i):
                file_extension = i
                valid_extension = True
                break

        if not valid_extension:
            this_error = 'Unsupported extension: ' + filename
            self.errors.append(this_error)
            logger.error(this_error)
            return

        book_ref = sorter[file_extension](filename, self.temp_dir, file_md5)

        # None of the following have an exception type specified
        # This will keep everything from crashing, but will make
        # troubleshooting difficult
        # TODO
        # In application notifications

        try:
            book_ref.read_book()
        except Exception as e:
            this_error = f'Error initializing: {filename}'
            self.errors.append(this_error)
            logger.exception(this_error + f' {type(e).__name__} Arguments: {e.args}')
            return

        this_book = {}
        this_book[file_md5] = {
            'hash': file_md5,
            'path': filename}

        # Different modes require different values
        if self.work_mode == 'addition':
            try:
                metadata = book_ref.generate_metadata()
            except Exception as e:
                this_error = f'Metadata generation error: {filename}'
                self.errors.append(this_error)
                logger.exception(this_error + f' {type(e).__name__} Arguments: {e.args}')
                return

            title = metadata.title
            author = metadata.author
            year = metadata.year
            isbn = metadata.isbn

            tags = None
            if self.auto_tags:
                tags = metadata.tags

            cover_image_raw = metadata.cover
            if cover_image_raw:
                cover_image = resize_image(cover_image_raw)
            else:
                cover_image = None
                if self.auto_cover:
                    cover_image = fetch_cover(title, author)

            this_book[file_md5]['cover_image'] = cover_image
            this_book[file_md5]['addition_mode'] = self.addition_mode

        if self.work_mode == 'reading':
            try:
                book_breakdown = book_ref.generate_content()
            except Exception as e:
                this_error = f'Content generation error: {filename}'
                self.errors.append(this_error)
                logger.exception(this_error + f' {type(e).__name__} Arguments: {e.args}')
                return

            toc = book_breakdown[0]
            content = book_breakdown[1]
            images_only = book_breakdown[2]

            try:
                book_data = self.database_entry_for_book(file_md5)
            except TypeError:
                logger.error(
                    f'Database error: {filename}. Re-add book to program')
                return

            title = book_data[0].replace('&', '&&')
            author = book_data[1]
            year = book_data[2]
            isbn = book_data[3]
            tags = book_data[4]
            position = book_data[5]
            bookmarks = book_data[6]
            cover = book_data[7]
            annotations = book_data[8]

            this_book[file_md5]['position'] = position
            this_book[file_md5]['bookmarks'] = bookmarks
            this_book[file_md5]['toc'] = toc
            this_book[file_md5]['content'] = content
            this_book[file_md5]['images_only'] = images_only
            this_book[file_md5]['cover'] = cover
            this_book[file_md5]['annotations'] = annotations

        this_book[file_md5]['title'] = title
        this_book[file_md5]['author'] = author
        this_book[file_md5]['year'] = year
        this_book[file_md5]['isbn'] = isbn
        this_book[file_md5]['tags'] = tags

        return this_book

    def read_progress(self):
        while True:
            processed_file = self.queue.get()
            self.threading_completed.append(processed_file)

            total_number = len(self.file_list)
            completed_number = len(self.threading_completed)

            # Just for the record, this slows down book searching by about 20%
            if _progress_emitter:  # Skip update in reading mode
                _progress_emitter.update_progress(
                    completed_number * 100 // total_number)

            if total_number == completed_number:
                break

    def initiate_threads(self):
        if not self.file_list:
            return None

        def pool_creator():
            _pool = Pool(thread_count)
            self.processed_books = _pool.map(
                self.read_book, self.file_list)

            _pool.close()
            _pool.join()

        start_time = time.time()

        worker_thread = threading.Thread(target=pool_creator)
        progress_thread = threading.Thread(target=self.read_progress)
        worker_thread.start()
        progress_thread.start()

        worker_thread.join()
        progress_thread.join(timeout=.5)

        return_books = {}
        # Exclude None returns generated in case of duplication / parse errors
        self.processed_books = [i for i in self.processed_books if i]
        for i in self.processed_books:
            for j in i:
                return_books[j] = i[j]

        del self.processed_books
        processing_time = str(time.time() - start_time)
        logger.info('Finished processing in ' + processing_time)

        return return_books, self.errors