Ejemplo n.º 1
0
def upload():
    alphabet = string.digits + string.ascii_letters + '$%. ♠♥♦♣'  # '$%. '
    recognizer_alphabet = ''.join(sorted(set(alphabet.lower())))
    blank_label_idx = len(recognizer_alphabet)
    file = request.files['file']
    if file and allowed_file(file.filename):

        filename = file.filename
        #save_filename = filename.split('.')[0]+'.jpg'
        # save received image
        img_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)

        file.save(img_path)

        #img = Image.open(img_path)
        img = tools.read(img_path)
        img = utils.compute_input(img)

        img = np.expand_dims(img, 0)
        # Call TensorFlowServing api for predict

        masks_async = detects_calculation.delay(img.tolist())
        predictions = masks_async.get()

        bboxes = utils.getBoxes([
            np.array(predictions[0]),
        ],
                                detection_threshold=0.7,
                                text_threshold=0.4,
                                link_threshold=0.4,
                                size_threshold=10)

        #img_for_recognition = tools.read(img_path) # diferent read algor ????
        img = tools.read(img_path)

        recognized_res = recognition_task.delay(img.tolist(),
                                                bboxes[0].tolist())
        res = recognized_res.get()

        res = [
            ''.join([
                recognizer_alphabet[idx] for idx in row
                if idx not in [blank_label_idx, -1]
            ]) for row in res
        ]
        prediction_groups = [
            list(zip(predictions, boxes))
            for predictions, boxes in zip([res], bboxes)
        ]
        fig, axs = plt.subplots(nrows=1, figsize=(10, 10))
        tools.drawAnnotations(img, predictions=prediction_groups[0], ax=axs)
        fig.savefig(
            os.path.join(app.config['UPLOAD_FOLDER'], 'ocr_' + filename))
        return send_from_directory(app.config['UPLOAD_FOLDER'],
                                   'ocr_' + filename)
Ejemplo n.º 2
0
def main():
    try: input = raw_input
    except NameError: pass
    print('Block request http://www.douyu.com/lapi/live/getPlay/(roomid)')
    print('Enter getPlay request query:')
    req=input()
    if req:
        rtmp_url=getRtmpUrl(req)
        print('### rtmp_url: ',rtmp_url)
        tmpl=tools.read(conf.manual_tmpl_path).replace('<rtmp_url>',rtmp_url)
    else:
        tmpl=tools.read(conf.now_tmpl_path)
    tools.write(conf.douyutv_plug_path,tmpl)
    print('\n\n copy douyutv.py plug ok')
    print('streamlink http://www.douyutv.com/cold medium -o ')
Ejemplo n.º 3
0
def create_bboxes_array(images, box_groups, **kwargs) -> typing.List[str]:
    """Recognize text from images using lists of bounding boxes.
    Args:
        images: A list of input images, supplied as numpy arrays with shape
            (H, W, 3).
        boxes: A list of groups of boxes, one for each image
    """
    assert len(box_groups) == len(images), \
        'You must provide the same number of box groups as images.'
    crops = []
    start_end = []
    for image_path, boxes in zip(images, box_groups):
        image = tools.read(image_path)
        # if self.prediction_model.input_shape[-1] == 1 and image.shape[-1] == 3:
        # Convert color to grayscale
        # image = cv2.cvtColor(image, code=cv2.COLOR_RGB2GRAY)
        for box in boxes:
            crops.append(
                tools.warpBox(image=image,
                              box=box,
                              target_height=31,
                              target_width=200))
        start = 0 if not start_end else start_end[-1][1]
        start_end.append((start, start + len(boxes)))
    if not crops:
        return [[] for image in images]
    X = np.float32(crops) / 255
    if len(X.shape) == 3:
        X = X[..., np.newaxis]

    return X
Ejemplo n.º 4
0
    def recognize_from_boxes(self, images, box_groups, **kwargs) -> typing.List[str]:
        """Recognize text from images using lists of bounding boxes.

        Args:
            images: A list of input images, supplied as numpy arrays with shape
                (H, W, 3).
            boxes: A list of groups of boxes, one for each image
        """
        assert len(box_groups) == len(images), \
            'You must provide the same number of box groups as images.'
        crops = []
        start_end = []
        for image, boxes in zip(images, box_groups):
            image = tools.read(image)
            if self.prediction_model.input_shape[-1] == 1 and image.shape[-1] == 3:
                # Convert color to grayscale
                image = cv2.cvtColor(image, code=cv2.COLOR_RGB2GRAY)
            for box in boxes:
                crops.append(
                    tools.warpBox(image=image,
                                  box=box,
                                  target_height=self.model.input_shape[1],
                                  target_width=self.model.input_shape[2]))
            start = 0 if not start_end else start_end[-1][1]
            start_end.append((start, start + len(boxes)))
        if not crops:
            return [[] for image in images]
        X = np.float32(crops) / 255
        if len(X.shape) == 3:
            X = X[..., np.newaxis]
        predictions = [
            ''.join([self.alphabet[idx] for idx in row if idx not in [self.blank_label_idx, -1]])
            for row in self.prediction_model.predict(X, **kwargs)
        ]
        return [predictions[start:end] for start, end in start_end]
Ejemplo n.º 5
0
    def detectChars(self,
                    images: typing.List[typing.Union[np.ndarray, str]],
                    detection_threshold=0.7,
                    thickness=3,
                    text_threshold=0.4,
                    link_threshold=0.4,
                    size_threshold=10,
                    tolerance=0.1,
                    **kwargs):
        """Recognize the Chars in a set of images.

        Args:
            images: Can be a list of numpy arrays of shape HxWx3 or a list of
                filepaths.
            link_threshold: This is the same as `text_threshold`, but is applied to the
                link map instead of the text map.
            detection_threshold: We want to avoid including boxes that may have
                represented large regions of low confidence text predictions. To do this,
                we do a final check for each word box to make sure the maximum confidence
                value exceeds some detection threshold. This is the threshold used for
                this check.
            text_threshold: When the text map is processed, it is converted from confidence
                (float from zero to one) values to classification (0 for not text, 1 for
                text) using binary thresholding. The threshold value determines the
                breakpoint at which a value is converted to a 1 or a 0. For example, if
                the threshold is 0.4 and a value for particular point on the text map is
                0.5, that value gets converted to a 1. The higher this value is, the less
                likely it is that characters will be merged together into a single word.
                The lower this value is, the more likely it is that non-text will be detected.
                Therein lies the balance.
            size_threshold: The minimum area for a word.
        """
        original_images = [(tools.read(image)) for image in images]
        images = [compute_input(image) for image in original_images]
        results = self.model.predict(np.array(images), **kwargs)
        boxes = getBoxes(results,
                         detection_threshold=detection_threshold,
                         text_threshold=text_threshold,
                         link_threshold=link_threshold,
                         size_threshold=size_threshold)
        resized_results = [(cv2.resize(i[..., 0],(images[cnt].shape[1], \
            images[cnt].shape[0]))*255).astype(np.uint8) for cnt,i in enumerate(results)]
        all_transformed_chars, all_images = [], []
        for image, resized_result, box in zip(original_images, resized_results,
                                              boxes):
            transformed_chars, transformed_coors = [], []
            for words in box:
                transformed_char, transformed_coor = tools.warpChars(
                    image, resized_result, words, tolerance)
                transformed_coors.extend(transformed_coor)
                transformed_chars.extend(transformed_char)
            transformed_coors = np.array(transformed_coors).astype(np.int32)
            new_img = tools.drawBoxes(image,
                                      transformed_coors,
                                      thickness=thickness)
            all_images.append(new_img)
            all_transformed_chars.append(transformed_chars)
        return all_images, all_transformed_chars, transformed_coors
Ejemplo n.º 6
0
def test_read_string_ven_changeunits():
    '''Test reading data from an existing ven file and changing to English units.'''

    df = tools.read('tests/tabs_V_ven', units='E')
    # test column names
    dfcolumns = 'East [kts]\tNorth [kts]\tDir [deg T]\tWaterT [deg F]\tTx\tTy\tSpeed [kts]\tAcross [kts]\tAlong [kts]'
    assert '\t'.join(df.columns.values) == dfcolumns
    # test values in final line
    dftail1 = np.array([[0.04, 0.46, 170, 75.2, 0.0, -2.0, 0.46, -0.45, 0.1]])
    assert np.allclose(dftail1, df.tail(1).values)
Ejemplo n.º 7
0
def test_read_string_met():
    '''Test reading data from an existing met file.'''

    df = tools.read('tests/tabs_V_met')
    # test column names
    dfcolumns = 'East [m/s]\tNorth [m/s]\tAirT [deg C]\tAtmPr [MB]\tGust [m/s]\tComp [deg M]\tTx\tTy\tPAR \tRelH [%]\tSpeed [m/s]\tDir from [deg T]'
    assert '\t'.join(df.columns.values) == dfcolumns
    # test values in final line
    dftail1 = np.array([[-4.29, -0.45, 15.70, 1015.74, 6.34, 169.50, 0, 0, 0.00, 108.20, 4.31, 84.00]])
    assert np.allclose(dftail1, df.tail(1).values)
Ejemplo n.º 8
0
def test_read_string_wave():
    '''Test reading data from an existing wave file.'''

    df = tools.read('tests/tabs_V_wave')
    # test column names
    dfcolumns = 'WaveHeight [m]\tMeanPeriod [s]\tPeakPeriod [s]'
    assert '\t'.join(df.columns.values) == dfcolumns
    # test values in final line
    dftail1 = np.array([[0.86, 4.26, 5.13]])
    assert np.allclose(dftail1, df.tail(1).values)
Ejemplo n.º 9
0
def test_read_string_wave_changeunits():
    '''Test reading data from an existing wave file and changing to English units.'''

    df = tools.read('tests/tabs_V_wave', units='E')
    # test column names
    dfcolumns = 'WaveHeight [ft]\tMeanPeriod [s]\tPeakPeriod [s]'
    assert '\t'.join(df.columns.values) == dfcolumns
    # test values in final line
    dftail1 = np.array([[2.8, 4.26, 5.13]])
    assert np.allclose(dftail1, df.tail(1).values)
Ejemplo n.º 10
0
def test_read_string_ven():
    '''Test reading data from an existing ven file.'''

    df = tools.read('tests/tabs_V_ven')
    # test column names
    dfcolumns = 'East [cm/s]\tNorth [cm/s]\tDir [deg T]\tWaterT [deg C]\tTx\tTy\tSpeed [cm/s]\tAcross [cm/s]\tAlong [cm/s]'
    assert '\t'.join(df.columns.values) == dfcolumns
    # test values in final line
    dftail1 = np.array([[2.06, 23.59, 170., 24.02, 0., -2., 23.68, -23.16, 4.92]])
    assert np.allclose(dftail1, df.tail(1).values)
Ejemplo n.º 11
0
def test_read_string_met_changeunits():
    '''Test reading data from an existing met file and changing to English units.'''

    df = tools.read('tests/tabs_V_met', units='E')
    # test column names
    dfcolumns = 'East [kts]\tNorth [kts]\tAirT [deg F]\tAtmPr [inHg]\tGust [kts]\tComp [deg M]\tTx\tTy\tPAR \tRelH [%]\tSpeed [kts]\tDir from [deg T]'
    assert '\t'.join(df.columns.values) == dfcolumns
    # test values in final line
    dftail1 = np.array([[-8.34, -0.87, 60.3, 29.99, 12.32, 169.5, 0., 0., 0., 108.2, 8.38, 84.]])
    assert np.allclose(dftail1, df.tail(1).values)
    pass
Ejemplo n.º 12
0
def test_read_mysql_met():
    '''Test reading 1st line of met data buoy V from mysql database.'''

    engine = tools.engine()
    query = 'select * from tabs_V_met limit 0,1'
    df = tools.read([query, engine])
    # test column names
    dfcolumns = 'East [m/s]\tNorth [m/s]\tAirT [deg C]\tAtmPr [MB]\tGust [m/s]\tComp [deg M]\tTx\tTy\tPAR \tRelH [%]\tSpeed [m/s]\tDir from [deg T]'
    assert '\t'.join(df.columns.values) == dfcolumns
    # test values in final line
    dftail1 = np.array([[-1.45, 1.07, 24.4, 1020.14, 8.33, 21.3, 0., -3., 0., 91.8, 1.8, 126.]])
    assert np.allclose(dftail1, df.tail(1).values)
Ejemplo n.º 13
0
def test_read_mysql_ven():
    '''Test reading 1st line of ven data buoy V from mysql database.'''

    engine = tools.engine()
    query = 'select * from tabs_V_ven limit 0,1'
    df = tools.read([query, engine])
    # test column names
    dfcolumns = 'East [cm/s]\tNorth [cm/s]\tDir [deg T]\tWaterT [deg C]\tTx\tTy\tSpeed [cm/s]\tAcross [cm/s]\tAlong [cm/s]'
    assert '\t'.join(df.columns.values) == dfcolumns
    # test values in final line
    dftail1 = np.array([[15.11, -3.58, 256., 23.54, 0., -1., 15.53, 5.39, 14.56]])
    assert np.allclose(dftail1, df.tail(1).values)
Ejemplo n.º 14
0
def test_read_mysql_met_changeunits():
    '''Test reading 1st line of met data buoy V from mysql database.'''

    engine = tools.engine()
    query = 'select * from tabs_V_met limit 0,1'
    df = tools.read([query, engine], units='E')
    # test column names
    dfcolumns = 'East [kts]\tNorth [kts]\tAirT [deg F]\tAtmPr [inHg]\tGust [kts]\tComp [deg M]\tTx\tTy\tPAR \tRelH [%]\tSpeed [kts]\tDir from [deg T]'
    assert '\t'.join(df.columns.values) == dfcolumns
    # test values in final line
    dftail1 = np.array([[-2.82, 2.08, 75.9, 30.12, 16.19, 21.3, 0., -3., 0., 91.8, 3.5, 126.]])
    assert np.allclose(dftail1, df.tail(1).values)
Ejemplo n.º 15
0
def test_read_mysql_eng():
    '''Test reading 1st line of eng data buoy V from mysql database.'''

    engine = tools.engine()
    query = 'select * from tabs_V_eng limit 0,1'
    df = tools.read([query, engine])
    # test column names
    dfcolumns = 'VBatt [Oper]\tSigStr [dB]\tComp [deg M]\tNping\tTx\tTy\tADCP Volt\tADCP Curr\tVBatt [sleep]'
    assert '\t'.join(df.columns.values) == dfcolumns
    # test values in final line
    dftail1 = np.array([[13.9, -3.69, 256., 121., 0., -1., 29.94, 6.45, 13.9]])
    assert np.allclose(dftail1, df.tail(1).values)
Ejemplo n.º 16
0
def test_read_mysql_wave_changeunits():
    '''Test reading 1st line of wave data buoy V from mysql database.'''

    engine = tools.engine()
    query = 'select * from tabs_V_wave limit 0,1'
    df = tools.read([query, engine], units='E')
    # test column names
    dfcolumns = 'WaveHeight [ft]\tMeanPeriod [s]\tPeakPeriod [s]'
    assert '\t'.join(df.columns.values) == dfcolumns
    # test values in final line
    dftail1 = np.array([[3.8,  5.0539,  6.6667]])
    assert np.allclose(dftail1, df.tail(1).values)
Ejemplo n.º 17
0
def test_read_mysql_salt():
    '''Test reading 1st line of salt data buoy V from mysql database.'''

    engine = tools.engine()
    query = 'select * from tabs_V_salt limit 0,1'
    df = tools.read([query, engine])
    # test column names
    dfcolumns = 'Temp [deg C]\tCond [ms/cm]\tSalinity\tDensity [kg/m^3]\tSoundVel [m/s]'
    assert '\t'.join(df.columns.values) == dfcolumns
    # test values in final line
    dftail1 = np.array([[32.52, -0.01, 0.01, 99.9999, 99.9999]])
    assert np.allclose(dftail1, df.tail(1).values)
Ejemplo n.º 18
0
def test_read_mysql_ven_changeunits():
    '''Test reading 1st line of ven data buoy V from mysql database and change
    to English units.'''

    engine = tools.engine()
    query = 'select * from tabs_V_ven limit 0,1'
    df = tools.read([query, engine], units='E')
    # test column names
    dfcolumns = 'East [kts]\tNorth [kts]\tDir [deg T]\tWaterT [deg F]\tTx\tTy\tSpeed [kts]\tAcross [kts]\tAlong [kts]'
    assert '\t'.join(df.columns.values) == dfcolumns
    # test values in final line
    dftail1 = np.array([[0.29, -0.07, 256, 74.4, 0.0, -1.0, 0.3, 0.1, 0.28]])
    assert np.allclose(dftail1, df.tail(1).values)
Ejemplo n.º 19
0
def get_icdar_2013_detector_dataset(labels):
    """
    DEPRECATED
    Get the ICDAR 2013 text segmentation dataset for detector
    training. Only the training set has the necessary annotations.
    For the test set, only segmentation maps are provided, which
    do not provide the necessary information for affinity scores.

    Args:
        cache_dir: The directory in which to store the data.
        skip_illegible: Whether to skip illegible characters.

    Returns:
        Lists of (image_path, lines, confidence) tuples. Confidence
        is always 1 for this dataset. We record confidence to allow
        for future support for weakly supervised cases.
    """

    for index in itertools.cycle(range(len(labels))):
        print(labels[index])
        image_path, gt_filepath = labels[index]
        image = tools.read(image_path)

        lines = []
        character_bboxes = []
        characters = []
        confidences = []
        with open(gt_filepath, 'r') as f:
            current_line = []
            current_bbox = []
            for row in f.read().split('\n'):
                if row == '':
                    lines.append(current_line)
                    current_line = []
                else:
                    row = row.split(' ')[5:]
                    character = row[-1][1:-1]
                    if character == '' and skip_illegible:
                        continue
                    x1, y1, x2, y2 = map(int, row[:4])
                    current_bbox.append([[x1, y1], [x2, y1], [x2, y2],
                                         [x1, y2]])
                    characters.append(character)
        # Some lines only have illegible characters and if skip_illegible is True,
        # then these lines will be blank.
        character_bboxes.append(np.array(current_bbox))
        confidences.append(1.0)

        yield image, character_bboxes, characters, np.ones(
            (image.shape[0], image.shape[1]), np.float32), confidences
Ejemplo n.º 20
0
def test_present():
    '''Test functionality of present.

    http://stackoverflow.com/questions/16571150/how-to-capture-stdout-output-from-a-python-function-call
    '''

    df = tools.read('tests/tabs_V_ven')
    f = io.StringIO()
    with redirect_stdout(f):
        tools.present(df)
    out = f.getvalue()

    assert out  # make sure not empty
    assert isinstance(out, str)
Ejemplo n.º 21
0
def test_make_text():
    '''Test file writing.'''

    df = tools.read('tests/tabs_V_ven')
    fname = 'tests/write_tabs_V_ven'
    run_daily.make_text(df, fname)

    assert open(fname).readlines() == ['Dates [UTC]\tEast [cm/s]\tNorth [cm/s]\tDir [deg T]\tWaterT [deg C]\tTx\tTy\tSpeed [cm/s]\tAcross [cm/s]\tAlong [cm/s]\n',
                                        '2017-01-05 00:00:00\t-2.50\t28.27\t139.00\t24.03\t0\t-1\t28.38\t-28.36\t0.96\n',
                                        '2017-01-05 00:30:00\t0.69\t25.37\t144.00\t24.03\t0\t-1\t25.38\t-25.10\t3.78\n',
                                        '2017-01-05 01:00:00\t1.06\t24.70\t161.00\t24.02\t0\t-1\t24.72\t-24.39\t4.06\n',
                                        '2017-01-05 01:30:00\t2.06\t23.59\t170.00\t24.02\t0\t-2\t23.68\t-23.16\t4.92\n']

    # remove file after checking
    remove(fname)
Ejemplo n.º 22
0
    def recognize(self,
                  images,
                  detection_kwargs=None,
                  recognition_kwargs=None):
        """Run the pipeline on one or multiples images.

        Args:
            images: The images to parse (can be a list of actual images or a list of filepaths)
            detection_kwargs: Arguments to pass to the detector call
            recognition_kwargs: Arguments to pass to the recognizer call

        Returns:
            A list of lists of (text, box) tuples.
        """

        # Make sure we have an image array to start with.
        if not isinstance(images, np.ndarray):
            images = [tools.read(image) for image in images]
        # This turns images into (image, scale) tuples temporarily
        images = [
            tools.resize_image(image,
                               max_scale=self.scale,
                               max_size=self.max_size) for image in images
        ]
        max_height, max_width = np.array(
            [image.shape[:2] for image, scale in images]).max(axis=0)
        scales = [scale for _, scale in images]
        images = np.array([
            tools.pad(image, width=max_width, height=max_height)
            for image, _ in images
        ])
        if detection_kwargs is None:
            detection_kwargs = {}
        if recognition_kwargs is None:
            recognition_kwargs = {}
        box_groups = self.detector.detect(images=images, **detection_kwargs)
        prediction_groups = self.recognizer.recognize_from_boxes(
            images=images, box_groups=box_groups, **recognition_kwargs)
        box_groups = [
            tools.adjust_boxes(
                boxes=boxes, boxes_format='boxes', scale=1 /
                scale) if scale != 1 else boxes
            for boxes, scale in zip(box_groups, scales)
        ]
        return [
            list(zip(predictions, boxes))
            for predictions, boxes in zip(prediction_groups, box_groups)
        ]
Ejemplo n.º 23
0
def get_detector_image_generator(labels,
                                 width,
                                 height,
                                 augmenter=None,
                                 area_threshold=0.5):
    """Generated augmented (image, lines) tuples from a list
    of (filepath, lines, confidence) tuples. Confidence is
    not used right now but is included for a future release
    that uses semi-supervised data.

    Args:
        labels: A list of (image, lines, confience) tuples.
        augmenter: An augmenter to apply to the images.
        width: The width to use for output images
        height: The height to use for output images
        area_threshold: The area threshold to use to keep
            characters in augmented images.
    """
    labels = labels.copy()
    for index in itertools.cycle(range(len(labels))):
        if index == 0:
            random.shuffle(labels)
        image_filepath, lines = labels[index]
        image = tools.read(image_filepath)
        if augmenter is not None:
            image, lines = tools.augment(boxes=lines,
                                         boxes_format='lines',
                                         image=image,
                                         area_threshold=area_threshold,
                                         augmenter=augmenter)
        image, scale = tools.fit(image,
                                 width=width,
                                 height=height,
                                 mode='letterbox',
                                 return_scale=True)
        lines = tools.adjust_boxes(boxes=lines,
                                   boxes_format='lines',
                                   scale=scale)

        bboxes = [line[0] for line in lines]
        words = [line[1] for line in lines]
        words = ''.join(words)

        yield image[np.newaxis, ...], np.array(bboxes)[np.newaxis, ...],\
              np.array(words)[np.newaxis, ... ], np.ones((image.shape[0], image.shape[1]), np.float32)[np.newaxis, ...],\
              np.ones(len(words), np.float32)[np.newaxis, ...]
Ejemplo n.º 24
0
def run_continue(filename, generations, save_interval):
    """Run a set number of tournaments, saving along the way every save_interval generations."""
    start = time.time()
    mga = read(filename)
    for g in range(int(
            generations /
            save_interval)):  # Run X generations and save every Y generations
        print('Running generations %i - %i...' % (g * save_interval,
                                                  (g + 1) * save_interval))
        # Run simulation
        mga.runTournaments(save_interval * mga.popsize, report=True)
        # Save data
        generation = int(mga.generationsRun)
        date = mga.dateEdited
        filename = '%s_G%i_%s' % (filename[:-14], generation, date)
        save(filename, mga)
        print('%f sec elapsed so far \n' % (time.time() - start))
Ejemplo n.º 25
0
def test_make_text():
    '''Test file writing.'''

    df = tools.read('tests/tabs_V_ven')
    fname = 'tests/write_tabs_V_ven'
    run_daily.make_text(df, fname)

    assert open(fname).readlines() == [
        'Dates [UTC]\tEast [cm/s]\tNorth [cm/s]\tDir [deg T]\tWaterT [deg C]\tTx\tTy\tSpeed [cm/s]\tAcross [cm/s]\tAlong [cm/s]\n',
        '2017-01-05 00:00:00\t-2.50\t28.27\t139.00\t24.03\t0\t-1\t28.38\t-28.36\t0.96\n',
        '2017-01-05 00:30:00\t0.69\t25.37\t144.00\t24.03\t0\t-1\t25.38\t-25.10\t3.78\n',
        '2017-01-05 01:00:00\t1.06\t24.70\t161.00\t24.02\t0\t-1\t24.72\t-24.39\t4.06\n',
        '2017-01-05 01:30:00\t2.06\t23.59\t170.00\t24.02\t0\t-2\t23.68\t-23.16\t4.92\n'
    ]

    # remove file after checking
    remove(fname)
Ejemplo n.º 26
0
def get_recognizer_image_generator(labels,
                                   height,
                                   width,
                                   alphabet,
                                   augmenter=None):
    """Generate augmented (image, text) tuples from a list
    of (filepath, box, label) tuples.

    Args:
        labels: A list of (filepath, box, label) tuples
        height: The height of the images to return
        width: The width of the images to return
        alphabet: The alphabet which limits the characters returned
        augmenter: The augmenter to apply to images
    """
    n_with_illegal_characters = sum(
        any(c not in alphabet for c in text) for _, _, text in labels)
    if n_with_illegal_characters > 0:
        print(
            f'{n_with_illegal_characters} / {len(labels)} instances have illegal characters.'
        )
    labels = labels.copy()
    for index in itertools.cycle(range(len(labels))):
        if index == 0:
            random.shuffle(labels)
        filepath, box, text = labels[index]
        cval = cval = np.random.randint(low=0, high=255,
                                        size=3).astype('uint8')
        if box is not None:
            image = tools.warpBox(image=tools.read(filepath),
                                  box=box.astype('float32'),
                                  target_height=height,
                                  target_width=width,
                                  cval=cval)
        else:
            image = tools.read_and_fit(filepath_or_array=filepath,
                                       width=width,
                                       height=height,
                                       cval=cval)
        text = ''.join([c for c in text if c in alphabet])
        if not text:
            continue
        if augmenter:
            image = augmenter.augment_image(image)
        yield (image, text)
Ejemplo n.º 27
0
def remake_file(buoys=None, tables=None, remaketype='hdf', remakefrom='txt'):
    '''Remake file from another file if messed up.

    Overwrites existing remaketype files.

    buoys (list): buoys to remake
    tables (list): tables to remake (just for TABS buoys). If buoys is None,
     tables will be read in for each buoy to cover all options.
    remaketype (str), default 'hdf': which type of file to remake
    remakefrom (str), default 'txt': which type of existing file to use to
     remake other file from.
    Options for both are 'hdf' and 'txt'.
    '''

    if buoys is None:
        buoys = bys.index

    # loop through buoys
    for buoy in buoys:

        # pulls out the non-nan table values to loop over valid table names
        if len(buoy) == 1 and tables is None:
            tables = [
                bys.loc[buoy, table] for table in tablekeys
                if not pd.isnull(bys.loc[buoy, table])
            ]
        elif tables is None:
            tables = ['unused']

        for table in tables:  # loop through tables for each buoy
            if len(buoy) == 1:
                assert table is not None, 'need to input table when using TABS buoy'
                fname = path.join('..', 'daily',
                                  'tabs_' + buoy + '_' + table + '_all')
            else:
                fname = path.join('..', 'daily', buoy + '_all')

            # read from remakefrom file, write to remaketype file
            df = tools.read(fname, remakefrom)
            tools.write_file(df,
                             fname,
                             filetype=remaketype,
                             mode='w',
                             append=False)
Ejemplo n.º 28
0
def getRoomObjList():
    global room_obj_list
    global files
    global isinit
    if isinit:
        return room_obj_list
    logging.info('init room obj list')
    md = read(conf.videolist_path)
    lines = md.split('\n')
    for l in lines:
        match = re.match(r'\[(.*)\]\((.*)\)', l)
        if l and match:
            room_obj = {
                'file_name': match.group(1) + '.mp4',
                'url': match.group(2)
            }
            if room_obj.get('file_name', '') in files:
                logging.info(room_obj.get('file_name', '') + ' is exist')
            else:
                room_obj_list.append(room_obj)
    isinit = True
    return room_obj_list
Ejemplo n.º 29
0
    def __init__(self, source) -> None:
        self.source = tools.read(source)
        if isinstance(self.source, str):
            self.source = self.source.splitlines()

        parts = []  #List[str]
        self.individuals = []  # List['ParseDisplayOutput.Individual']

        in_header = True
        for line in self.source:
            line = line.strip()

            ## skip header, until a line starts with Version
            if in_header:
                if line.startswith('Version'):
                    in_header = False
                else:
                    continue

            if line.startswith('Version'):
                mpos = line.index(', Market:')
                self.version = line[8:mpos].strip()
                self.market = line[mpos + 9:].strip()
                continue

            if line.startswith('First'):
                last = line.find('Last:') - 1
                while last > 0 and line[last] == ' ':
                    last = last - 1
                if last > 0 and line[last] != ',':
                    line = line[:last + 1] + ',' + line[last + 1:]

            if len(line) > 0:
                parts.extend(map(str.strip, line.split(',')))
            else:
                individual = self.parse_individual(parts)
                if individual is not None:
                    self.individuals.append(individual)
                parts = []
Ejemplo n.º 30
0
def load_data(*filenames):
    features = None
    labels = None

    for name in filenames:
        data = read(name)

        file_features = np.array([
            np.append(np.unpackbits(state), i % 2) for game in data
            for i, state in enumerate(game[0])
        ])

        file_labels = np.array([game[1] for game in data for _ in game[0]])

        if features is None:
            features = file_features
            labels = file_labels
        else:
            features = np.append(features, file_features, axis=0)
            labels = np.append(labels, file_labels, axis=0)

    return features, labels
Ejemplo n.º 31
0
    def detect(self,
               images: typing.List[typing.Union[np.ndarray, str]],
               detection_threshold=0.7,
               text_threshold=0.4,
               link_threshold=0.4,
               size_threshold=10,
               **kwargs):
        """Recognize the text in a set of images.

        Args:
            images: Can be a list of numpy arrays of shape HxWx3 or a list of
                filepaths.
        """
        images = [compute_input(tools.read(image)) for image in images]
        boxes = []
        for image in images:
            boxes.append(
                getBoxes(self.model.predict(image[np.newaxis], **kwargs),
                         detection_threshold=detection_threshold,
                         text_threshold=text_threshold,
                         link_threshold=link_threshold,
                         size_threshold=size_threshold)[0])
        return boxes
Ejemplo n.º 32
0
def remake_file(buoys=None, tables=None, remaketype='hdf', remakefrom='txt'):
    '''Remake file from another file if messed up.

    Overwrites existing remaketype files.

    buoys (list): buoys to remake
    tables (list): tables to remake (just for TABS buoys). If buoys is None,
     tables will be read in for each buoy to cover all options.
    remaketype (str), default 'hdf': which type of file to remake
    remakefrom (str), default 'txt': which type of existing file to use to
     remake other file from.
    Options for both are 'hdf' and 'txt'.
    '''

    if buoys is None:
        buoys = bys.keys()

    # loop through buoys
    for buoy in buoys:

        # pulls out the non-nan table values to loop over valid table names
        if len(buoy) == 1 and tables is None:
            tables = [bys[buoy][table] for table in tablekeys if not pd.isnull(bys[buoy][table])]
        elif tables is None:
            tables = ['unused']

        for table in tables:  # loop through tables for each buoy
            if len(buoy) == 1:
                assert table is not None, 'need to input table when using TABS buoy'
                fname = path.join('..', 'daily', 'tabs_' + buoy + '_' + table + '_all')
            else:
                fname = path.join('..', 'daily', buoy + '_all')

            # read from remakefrom file, write to remaketype file
            df = tools.read(fname, remakefrom)
            tools.write_file(df, fname, filetype=remaketype, mode='w', append=False)
Ejemplo n.º 33
0
def readwrite(buoy, table=None, dstart=pd.Timestamp('1980-1-1', tz='utc')):
    '''Creates or updates buoy data files.

    Reads through yesterday so that when appended to everything is consistent.
    This will take a long time to run if none of the files exist.
    Note that dstart is ignored if buoy data file already exists.
    '''

    # bring data in file up through yesterday. This way files are
    # consistent regardless of what time of day script is run.
    dend = pd.Timestamp('now', tz='UTC').normalize()
    # file write flag
    mode = 'w'
    append = False  # for hdf file

    if len(buoy) == 1:
        assert table is not None, 'need to input table when using TABS buoy'
        fname = path.join('..', 'daily', 'tabs_' + buoy + '_' + table + '_all')
    else:
        fname = path.join('..', 'daily', buoy + '_all')

    # if buoy is inactive and its "all" file exists, don't read
    if buoy in bys.keys() and not bys[buoy]['active'] and path.exists(fname):
        return

    # two types of files
    Types = ['txt', 'hdf']

    # if any of the files exist, then we want to make sure they are consistent
    if np.asarray([path.exists(fname + '.' + Type) for Type in Types]).any():
        lastrows = []
        for Type in Types:
            # get last row in file
            try:
                lastrows.append(tools.read(fname, Type, lastlineonly=True))
            # if can't get last row, remake file
            except:
                logging.warning('Could not access existing file %s of type %s. Will remake.' % (fname, Type))
                # try other type of files to remake this file if needed
                othertype = [temp for temp in Types if temp != Type]
                try:
                    remake_file(buoys=[buoy], tables=[table], remaketype=Type, remakefrom=othertype[0])
                    logging.warning('Remade file of type %s from type %s for buoy %s' % (Type, othertype[0], buoy))
                except:
                    logging.warning('Could not remake file for buoy %s' % (buoy))
                # now the file should exist, so can read in lastrow
                lastrows.append(tools.read(fname, Type, lastlineonly=True))


        # if last rows are not the same, remake shorter file
        if not lastrows[0] == lastrows[1]:
            lastrow = lastrows[0]; lastrow2 = lastrows[1]
            Type = Types[0]; Type2 = Types[1]
            if lastrow < lastrow2:
                remake_file(buoys=[buoy], remaketype=Type, remakefrom=Type2)
                logging.warning('File type %s for buoy %s was short and remade with file type %s.' % (Type, buoy, Type2))
            elif lastrow2 < lastrow:
                remake_file(buoys=[buoy], remaketype=Type2, remakefrom=Type)
                logging.warning('File type %s for buoy %s was short and remade with file type %s.' % (Type2, buoy, Type))

    # now files should be consistent at this point if they already exist
    # if file already exists, overwrite dstart with day after day from last line of file
    if path.exists(fname + '.hdf'):
        dstart = tools.read(fname, Type, lastlineonly=True).normalize().tz_localize('UTC') + pd.Timedelta('1 days')
        mode = 'a'  # overwrite write mode
        append = True  # overwrite append mode for hdf
    df = read.read(buoy, dstart, dend, table=table, units='M', tz='UTC',
                   usemodel=False, userecent=False)

    # can't append to file with empty dataframe
    if df is not None and not (mode == 'a' and df.empty):
        for Type in Types:
            try:
                tools.write_file(df, fname, filetype=Type, mode=mode, append=append)
            except:
                logging.warning('Could not write to file %s of type %s. Will remake.' % (fname, Type))
                # try both other types of files to remake this file if needed
                othertype = [temp for temp in Types if temp != Type]
                try:
                    remake_file(buoys=[buoy], tables=[table], remaketype=Type, remakefrom=othertype[0])
                    logging.warning('Remade file of type %s from type %s for buoy %s' % (Type, othertype[0], buoy))
                except:
                    logging.warning('Could not remake file for buoy %s' % (buoy))
    else:
        logging.warning('No new data has been read in for buoy ' + buoy + ' table ' + table)
os.chdir(lookup_dir)

# print welcome message
print 'markdown to html conversion for all txt files in this directory'
print 'and all subdirectories - for usage see comments in the python source'
print 'start: ' + str(datetime.time(datetime.now()))

# clean and recreate install directory
if os.path.isdir(install_dir):
    shutil.rmtree(install_dir)
os.mkdir(install_dir)

# load header
header = template_dir + os.sep + header_file
if os.path.exists(header) and os.path.isfile(header):
    header = tools.read(header)
else:
    header = u''

# copy stylesheet if existing
css = template_dir + os.sep + style_file
if os.path.exists(css) and os.path.isfile(css):
    text = tools.read(css)
    tools.write(install_dir + os.sep + style_file, text, 'ascii')
    # must save as ascii because Jave CSS import in JEditorPane does not read utf-8

# locate all lookup files
print "locating markdown files"
folders, files = tools.locate(lookup_files)
number = len(files)
print "found %d files" %number