def __iter__(self):
     filepath = download(self._url, self._cache_dir)
     with tarfile.open(filepath) as archive:
         for filename in archive.getnames():
             if filename.startswith('aclImdb/train/pos/'):
                 yield self._read(archive, filename), True
             elif filename.startswith('aclImdb/train/neg/'):
                 yield self._read(archive, filename), False
Beispiel #2
0
def test_reference_file_readonly(filename, md5, ref_sum, ref_num_cells):
    filename = helpers.download(filename, md5)

    mesh = meshio.read(filename)
    tol = 1.0e-2
    s = mesh.points.sum()
    assert abs(s - ref_sum) < tol * ref_sum
    assert {k: len(v) for k, v in mesh.cells.items()} == ref_num_cells
    assert {
        k: len(v["gmsh:physical"]) for k, v in mesh.cell_data.items()
    } == ref_num_cells
Beispiel #3
0
def test_reference_file(filename, md5, ref_sum, ref_num_cells, write_binary):
    filename = helpers.download(filename, md5)

    mesh = meshio.read(filename)
    tol = 1.0e-2
    s = numpy.sum(mesh.points)
    assert abs(s - ref_sum) < tol * ref_sum
    assert len(mesh.cells["triangle"]) == ref_num_cells
    writer = partial(meshio.vtk_io.write, write_binary=write_binary)
    helpers.write_read(writer, meshio.vtk_io.read, mesh, 1.0e-15)
    return
Beispiel #4
0
def test_reference_file(filename, md5, ref_sum, ref_num_cells, write_binary):
    filename = helpers.download(filename, md5)

    mesh = meshio.read(filename)
    tol = 1.0e-2
    s = mesh.points.sum()
    assert abs(s - ref_sum) < tol * ref_sum
    assert {k: len(v) for k, v in mesh.cells.items()} == ref_num_cells
    assert {
        k: len(v["gmsh:physical"]) for k, v in mesh.cell_data.items()
    } == ref_num_cells

    writer = partial(meshio.msh_io.write, fmt_version="2", write_binary=write_binary)
    helpers.write_read(writer, meshio.msh_io.read, mesh, 1.0e-15)
Beispiel #5
0
 def _read_pages(self, url):
     """
     Extract plain words from a Wikipedia dump and store them to the pages
     file. Each page will be a line with words separated by spaces.
     """
     wikipedia_path = download(url, self._cache_dir)
     with bz2.open(wikipedia_path) as wikipedia, \
             bz2.open(self._pages_path, 'wt') as pages:
         for _, element in etree.iterparse(wikipedia, tag='{*}page'):
             if element.find('./{*}redirect') is not None:
                 continue
             page = element.findtext('./{*}revision/{*}text')
             words = self._tokenize(page)
             pages.write(' '.join(words) + '\n')
             element.clear()
Beispiel #6
0
def test_reference_file_with_mixed_cells():
    filename = "med/cylinder.med"
    md5 = "e36b365542c72ef470b83fc21f4dad58"
    filename = helpers.download(filename, md5)
    mesh = meshio.read(filename)

    # Points
    assert numpy.isclose(mesh.points.sum(), 16.53169892762988)

    # Cells
    ref_num_cells = {"pyramid": 18, "quad": 18, "line": 17, "tetra": 63, "triangle": 4}
    assert {k: len(v) for k, v in mesh.cells.items()} == ref_num_cells

    # Point tags
    assert mesh.point_data["point_tags"].sum() == 52
    ref_point_tags_info = {2: ["Side"], 3: ["Side", "Top"], 4: ["Top"]}
    assert mesh.point_tags == ref_point_tags_info

    # Cell tags
    ref_sum_cell_tags = {
        "pyramid": -116,
        "quad": -75,
        "line": -48,
        "tetra": -24,
        "triangle": -30,
    }
    assert {
        k: v["cell_tags"].sum() for k, v in mesh.cell_data.items()
    } == ref_sum_cell_tags
    ref_cell_tags_info = {
        -6: ["Top circle"],
        -7: ["Top", "Top and down"],
        -8: ["Top and down"],
        -9: ["A", "B"],
        -10: ["B"],
        -11: ["B", "C"],
        -12: ["C"],
    }
    assert mesh.cell_tags == ref_cell_tags_info

    helpers.write_read(meshio.med_io.write, meshio.med_io.read, mesh, 1.0e-15)
Beispiel #7
0
def test_reference_file_with_mixed_cells():
    filename = "med/cylinder.med"
    md5 = "e36b365542c72ef470b83fc21f4dad58"
    filename = helpers.download(filename, md5)
    mesh = meshio.read(filename)

    # Points
    assert numpy.isclose(mesh.points.sum(), 16.53169892762988)

    # Cells
    ref_num_cells = {"pyramid": 18, "quad": 18, "line": 17, "tetra": 63, "triangle": 4}
    assert {k: len(v) for k, v in mesh.cells.items()} == ref_num_cells

    # Point tags
    assert mesh.point_data["point_tags"].sum() == 52
    ref_point_tags_info = {2: ["Side"], 3: ["Side", "Top"], 4: ["Top"]}
    assert mesh.point_tags == ref_point_tags_info

    # Cell tags
    ref_sum_cell_tags = {
        "pyramid": -116,
        "quad": -75,
        "line": -48,
        "tetra": -24,
        "triangle": -30,
    }
    assert {
        k: v["cell_tags"].sum() for k, v in mesh.cell_data.items()
    } == ref_sum_cell_tags
    ref_cell_tags_info = {
        -6: ["Top circle"],
        -7: ["Top", "Top and down"],
        -8: ["Top and down"],
        -9: ["A", "B"],
        -10: ["B"],
        -11: ["B", "C"],
        -12: ["C"],
    }
    assert mesh.cell_tags == ref_cell_tags_info

    helpers.write_read(meshio.med_io.write, meshio.med_io.read, mesh, 1.0e-15)
Beispiel #8
0
def test_reference_file_with_point_cell_data():
    filename = "med/box.med"
    md5 = "0867fb11bd14b83ad11ab20e2b1fd57d"
    filename = helpers.download(filename, md5)
    mesh = meshio.read(filename)

    # Points
    assert numpy.isclose(mesh.points.sum(), 12)

    # Cells
    assert {k: len(v) for k, v in mesh.cells.items()} == {"hexahedron": 1}

    # Point data
    data_u = mesh.point_data["resu____DEPL"]
    assert data_u.shape == (8, 3)
    assert numpy.isclose(data_u.sum(), 12)

    # Cell data
    # ELNO (1 data point for every node of each element)
    data_eps = mesh.cell_data["hexahedron"]["resu____EPSI_ELNO"]
    assert data_eps.shape == (1, 8, 6)  # (n_cells, n_nodes_per_element, n_components)
    data_eps_mean = numpy.mean(data_eps, axis=1)[0]
    eps_ref = numpy.array([1, 0, 0, 0.5, 0.5, 0])
    assert numpy.allclose(data_eps_mean, eps_ref)

    data_sig = mesh.cell_data["hexahedron"]["resu____SIEF_ELNO"]
    assert data_sig.shape == (1, 8, 6)  # (n_cells, n_nodes_per_element, n_components)
    data_sig_mean = numpy.mean(data_sig, axis=1)[0]
    sig_ref = numpy.array(
        [7328.44611253, 2645.87030114, 2034.06063679, 1202.6, 569.752, 0]
    )
    assert numpy.allclose(data_sig_mean, sig_ref)

    data_psi = mesh.cell_data["hexahedron"]["resu____ENEL_ELNO"]
    assert data_psi.shape == (1, 8, 1)  # (n_cells, n_nodes_per_element, n_components)

    # ELEM (1 data point for each element)
    data_psi_elem = mesh.cell_data["hexahedron"]["resu____ENEL_ELEM"]
    assert numpy.isclose(numpy.mean(data_psi, axis=1)[0, 0], data_psi_elem[0])

    helpers.write_read(meshio.med_io.write, meshio.med_io.read, mesh, 1.0e-15)
Beispiel #9
0
def test_reference_file_with_point_cell_data():
    filename = "med/box.med"
    md5 = "0867fb11bd14b83ad11ab20e2b1fd57d"
    filename = helpers.download(filename, md5)
    mesh = meshio.read(filename)

    # Points
    assert numpy.isclose(mesh.points.sum(), 12)

    # Cells
    assert {k: len(v) for k, v in mesh.cells.items()} == {"hexahedron": 1}

    # Point data
    data_u = mesh.point_data["resu____DEPL"]
    assert data_u.shape == (8, 3)
    assert numpy.isclose(data_u.sum(), 12)

    # Cell data
    # ELNO (1 data point for every node of each element)
    data_eps = mesh.cell_data["hexahedron"]["resu____EPSI_ELNO"]
    assert data_eps.shape == (1, 8, 6)  # (n_cells, n_nodes_per_element, n_components)
    data_eps_mean = numpy.mean(data_eps, axis=1)[0]
    eps_ref = numpy.array([1, 0, 0, 0.5, 0.5, 0])
    assert numpy.allclose(data_eps_mean, eps_ref)

    data_sig = mesh.cell_data["hexahedron"]["resu____SIEF_ELNO"]
    assert data_sig.shape == (1, 8, 6)  # (n_cells, n_nodes_per_element, n_components)
    data_sig_mean = numpy.mean(data_sig, axis=1)[0]
    sig_ref = numpy.array(
        [7328.44611253, 2645.87030114, 2034.06063679, 1202.6, 569.752, 0]
    )
    assert numpy.allclose(data_sig_mean, sig_ref)

    data_psi = mesh.cell_data["hexahedron"]["resu____ENEL_ELNO"]
    assert data_psi.shape == (1, 8)  # (n_cells, n_nodes_per_element, ) with 1 cut off

    # ELEM (1 data point for each element)
    data_psi_elem = mesh.cell_data["hexahedron"]["resu____ENEL_ELEM"]
    assert numpy.isclose(numpy.mean(data_psi, axis=1)[0], data_psi_elem[0])

    helpers.write_read(meshio.med_io.write, meshio.med_io.read, mesh, 1.0e-15)
Beispiel #10
0
def getData():
    r = download('https://adventofcode.com/2018/day/3/input')
    data = []
    regex = re.compile(r'^#(\d+) @ (\d+),(\d+): (\d+)x(\d+)$')
    # The wording of the problem says the size is "at least 1000x1000"
    # My input is exactly 1000x1000, but let's play it safe and work it out anyway
    max_x = 0
    max_y = 0
    for line in [x.decode() for x in r.iter_lines()]:
        linematch = regex.match(line)
        _, left, top, width, height = map(int, linematch.groups())
        # Process each line into a list of (x,y) tuples that its rectangle covers
        coords = []
        for x in range(left, left + width):
            for y in range(top, top + height):
                coords.append((x, y))
        max_x = max(max_x, left + width)
        max_y = max(max_y, top + height)
        data.append(coords)
    print('Grid size is {}x{}'.format(max_x, max_y))
    return data, (max_x, max_y)
Beispiel #11
0
def getData():
    r = download('https://adventofcode.com/2018/day/12/input')
    iterlines = r.iter_lines()

    # Prepend and append some padding to allow the 5-wide window tests
    initialState = padding + [x == b'#'[0]
                              for x in iterlines.__next__()[15:]] + padding
    # Skip the empty line in the input
    _ = iterlines.__next__()

    rules = []
    for line in iterlines:
        # Only store the rules which grow plants
        if line[9] == b'#'[0]:
            rule = 0
            for x in line[0:5]:
                rule = rule << 1
                rule += x == b'#'[0]
            rules.append(rule)

    return np.array(initialState, dtype=np.bool), np.array(rules,
                                                           dtype=np.uint8)
Beispiel #12
0
def getData():
    r = download('https://adventofcode.com/2018/day/7/input')
    regex = re.compile(
        r'^Step ([A-Z]) must be finished before step ([A-Z]) can begin\.$')
    data = {}
    available = []
    for line in r.iter_lines():
        blocker, blockee = regex.match(line.decode()).groups()
        if blocker not in data:
            data[blocker] = task(blocker)
            available.append(data[blocker])
        if blockee not in data:
            data[blockee] = task(blockee)

        data[blocker].blocks.add(data[blockee])
        data[blockee].blockedby.add(data[blocker])

        if data[blockee] in available:
            available.remove(data[blockee])

    # Sort the available tasks, by their name, backwards so that .pop() gets the next one
    available.sort(reverse=True)

    return data, available
Beispiel #13
0
def getData():
    r = download('https://adventofcode.com/2018/day/13/input')
    track = []
    carts = []

    for y, line in enumerate(r.iter_lines()):
        track.append([])
        for x, char in enumerate(line.decode()):
            if char in ['^', 'v']:
                track[-1].append('|')
                carts.append((x, y, 0, 1 if char == '^' else -1, 0))
            elif char in ['>', '<']:
                track[-1].append('-')
                carts.append((x, y, 1 if char == '>' else -1, 0, 0))
            else:
                track[-1].append(char)

    carts = np.array(carts,
                     dtype={
                         'names': ['x', 'y', 'dirx', 'diry', 'rot'],
                         'formats': [np.int16 for _ in range(5)]
                     })

    return np.array(track), carts
Beispiel #14
0
def getData():
    r = download('https://adventofcode.com/2018/day/23/input')
    regex = re.compile(r'^pos=<(-?\d+),(-?\d+),(-?\d+)>, r=(\d+)$')
    data = np.array(list(map(lambda x: tuple(map(int, regex.match(x.decode()).groups())), r.iter_lines())), dtype={'names': ['x', 'y', 'z', 'r'], 'formats': [np.int32, np.int32, np.int32, np.uint32]})
    return data
Beispiel #15
0
def getData():
    r = download('https://adventofcode.com/2018/day/9/input')
    regex = re.compile(r'^(\d+) players; last marble is worth (\d+) points$')
    return tuple(map(int, regex.match(r.text.strip()).groups()))
Beispiel #16
0
def add_from(request, prefix_lower, prefix_normal, multi=False):
    url = request.form.get(f'{prefix_lower}-art-url')
    if url == None and request.get_json() != None:
        url = request.get_json()[f'{prefix_lower}-art-url']
    if url != '' and url != None:
        if prefix_lower == 'deviantart':
            art = scrapers.deviant_art(url)
        elif prefix_lower == 'artstation':
            if multi:
                art = scrapers.art_station(url, True)
            else:
                art = scrapers.art_station(url)
        elif prefix_lower == 'pixiv':
            art = scrapers.pixiv(url, load_pickle().get('pixiv_username'), load_pickle().get('pixiv_password'))
        elif prefix_lower == 'tumblr':
            art = scrapers.tumblr(url)
        elif prefix_lower == 'instagram':
            art = scrapers.instagram(url)
        elif prefix_lower == 'reddit':
            art = scrapers.reddit(url)
        elif prefix_lower == 'twitter':
            art = scrapers.twitter(url)

        title = art['title']
        if multi:
            images = []
            for image_url in art['image_url']:
                images.append(helpers.download(image_url, UPLOAD_FOLDER))
            images = ','.join(images)
            image = images
        else:
            if prefix_lower == 'pixiv':
                image = helpers.download(art['image_url'], UPLOAD_FOLDER, art['source'])
            else:
                image = helpers.download(art['image_url'], UPLOAD_FOLDER)
        source = art['source']
        artist_name = art['artist_name']
        artist_website = art['artist_website']

        g.db = connect_db()

        if request.form.get('existing-artist'):
            artist_id = request.form.get('artist-id')
        else:
            artist = g.db.execute('SELECT id FROM artist WHERE website=?', [artist_website]).fetchone()
            if artist != None:
                artist_id = artist[0]
            else:
                cursor = g.db.execute('INSERT into artist(name, website) VALUES(?,?)', (artist_name, artist_website))
                artist_id = cursor.lastrowid

        cursor = g.db.execute('INSERT into art(title, image_url, artist_id, source) VALUES(?,?,?,?)', (title, image, artist_id, source))

        inserted_row_id = cursor.lastrowid

        g.db.commit()
        g.db.close()

        if helpers.request_wants_json():
            return jsonify(status='success', message='Art added', id=inserted_row_id)
        else:
            flash('Art added', 'success')
    else:
        if helpers.request_wants_json():
            return jsonify(status='error', message=f'{prefix_normal} Image url was empty')
        else:
            flash(f'{prefix_normal} Image url was empty', 'error')
            return redirect('/add')

    if not helpers.request_wants_json():
        return redirect('/art/' + str(inserted_row_id))
Beispiel #17
0
def edit_art():
    id = request.form.get('id')

    title = request.form.get('title')

    images_from_files = request.files.getlist('image-from-file')
    images_from_urls = request.form.getlist('image-from-url')
    existing_images = request.form.getlist('existing-image')
    images = []
    for local_image in request.form.getlist('local-image'):
        if local_image == 'true':
            file = images_from_files[0]
            images_from_files.pop(0)
            if file.filename == '':
                flash('No file selected', 'error')
                return redirect('/')
            if file and helpers.allowed_file(file.filename, ALLOWED_EXTENSIONS):
                filename = secure_filename(file.filename)
                filename = helpers.prepend_date_time_to_string(filename)
                file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
                image = filename
                images.append(image)
            else:
                flash('File extension not allowed', 'error')
                return redirect('/')
        elif local_image == 'false':
            image = images_from_urls[0]
            images_from_urls.pop(0)
            if image != '':
                image = helpers.download(image, UPLOAD_FOLDER)
                images.append(image)
            else:
                flash('Image url was empty', 'error')
                return redirect('/')
        elif local_image == 'existing':
            image = existing_images[0]
            existing_images.pop(0)
            images.append(image)

    images_string = ','.join(images)

    g.db = connect_db()

    if request.form.get('existing-artist') == 'true':
        artist_id = request.form.get('artist-id')
    else:
        artist_name = request.form.get('artist-name')
        artist_website = request.form.get('artist-website')
        cursor = g.db.execute('INSERT into artist(name, website) VALUES(?,?)', (artist_name, artist_website))
        artist_id = cursor.lastrowid

    source = request.form.get('source')

    images_before_edit = g.db.execute('SELECT image_url FROM art WHERE id=?', [id]).fetchone()[0]
    images_before_edit = images_before_edit.split(',')
    images_that_are_no_longer_in_use = list(set(images_before_edit) - set(images))
    for image_that_is_no_longer_in_use in images_that_are_no_longer_in_use:
        try:
            os.remove(os.path.join(UPLOAD_FOLDER, image_that_is_no_longer_in_use))
        except Exception as e:
            # print(e)
            pass

    g.db.execute('UPDATE art SET title=?, image_url=?, artist_id=?, source=?, updated_at=CURRENT_TIMESTAMP WHERE id=?', (title, images_string, artist_id, source, id))
    
    tags = request.form.getlist('tags')
    if tags != []:
        g.db.execute('DELETE FROM art_tag WHERE art_id=?', [id])
        for tag in tags:
            g.db.execute('INSERT into art_tag(art_id, tag_id) VALUES(?, ?)', (id, tag))
    
    g.db.commit()
    g.db.close()

    flash('Art updated', 'success')
    return redirect('/art/' + id)
Beispiel #18
0
def getData():
    r = download('https://adventofcode.com/2018/day/18/input')
    return np.array([list(x.decode()) for x in r.iter_lines()])
Beispiel #19
0
def getData():
    r = download('https://adventofcode.com/2018/day/21/input')
    iterator = r.iter_lines()
    iprLine = iterator.__next__().decode().split(' ')
    instructions = [[int(y) if i > 0 else y for i, y in enumerate(x.decode().split(' '))] for x in iterator]
    return int(iprLine[1]), instructions
Beispiel #20
0
def getData():
    return download('https://adventofcode.com/2018/day/15/input')
def create_alexnet():

    helpers.mkdir('data')
    numpy_data_path = os.path.join('data', 'bvlc_alexnet.npy')
    download_url = 'https://www.dropbox.com/s/gl5wa3uzru555nd/bvlc_alexnet.npy?dl=1'
    print('Downloading pre-trained AlexNet weights.')
    helpers.download(download_url, numpy_data_path)
    print('Weights downloaded.')

    variable_data = np.load(numpy_data_path, encoding='bytes').item()

    conv1_preW = variable_data["conv1"][0]
    conv1_preb = variable_data["conv1"][1]
    conv2_preW = variable_data["conv2"][0]
    conv2_preb = variable_data["conv2"][1]
    conv3_preW = variable_data["conv3"][0]
    conv3_preb = variable_data["conv3"][1]
    conv4_preW = variable_data["conv4"][0]
    conv4_preb = variable_data["conv4"][1]
    conv5_preW = variable_data["conv5"][0]
    conv5_preb = variable_data["conv5"][1]
    fc6_preW = variable_data["fc6"][0]
    fc6_preb = variable_data["fc6"][1]
    fc7_preW = variable_data["fc7"][0]
    fc7_preb = variable_data["fc7"][1]
    fc8_preW = variable_data["fc8"][0]
    fc8_preb = variable_data["fc8"][1]

    pixel_depth = 255.0
    resized_height = 227
    resized_width = 227
    num_channels = 3

    print('Creating AlexNet model.')

    graph = tf.Graph()

    with graph.as_default():
        x = tf.placeholder(tf.uint8, [None, None, None, num_channels],
                           name='input')

        to_float = tf.cast(x, tf.float32)
        resized = tf.image.resize_images(to_float,
                                         [resized_height, resized_width])

        # Convolution 1
        with tf.name_scope('conv1') as scope:
            kernel = tf.Variable(conv1_preW, name='weights')
            biases = tf.Variable(conv1_preb, name='biases')
            conv = tf.nn.conv2d(resized, kernel, [1, 4, 4, 1], padding="SAME")
            bias = tf.nn.bias_add(conv, biases)
            conv1 = tf.nn.relu(bias, name=scope)

        # Local response normalization 2
        radius = 2
        alpha = 2e-05
        beta = 0.75
        bias = 1.0
        lrn1 = tf.nn.local_response_normalization(conv1,
                                                  depth_radius=radius,
                                                  alpha=alpha,
                                                  beta=beta,
                                                  bias=bias)

        # Maxpool 1
        pool1 = tf.nn.max_pool(lrn1,
                               ksize=[1, 3, 3, 1],
                               strides=[1, 2, 2, 1],
                               padding='VALID',
                               name='pool1')

        # Convolution 2
        with tf.name_scope('conv2') as scope:

            kernel = tf.Variable(conv2_preW, name='weights')
            biases = tf.Variable(conv2_preb, name='biases')

            input_a, input_b = tf.split(pool1, 2, 3)
            kernel_a, kernel_b = tf.split(kernel, 2, 3)

            with tf.name_scope('A'):
                conv_a = tf.nn.conv2d(input_a,
                                      kernel_a, [1, 1, 1, 1],
                                      padding="SAME")

            with tf.name_scope('B'):
                conv_b = tf.nn.conv2d(input_b,
                                      kernel_b, [1, 1, 1, 1],
                                      padding="SAME")

            conv = tf.concat([conv_a, conv_b], 3)
            bias = tf.nn.bias_add(conv, biases)
            conv2 = tf.nn.relu(bias, name=scope)

        # Local response normalization 2
        radius = 2
        alpha = 2e-05
        beta = 0.75
        bias = 1.0
        lrn2 = tf.nn.local_response_normalization(conv2,
                                                  depth_radius=radius,
                                                  alpha=alpha,
                                                  beta=beta,
                                                  bias=bias)

        # Maxpool 2
        pool2 = tf.nn.max_pool(lrn2,
                               ksize=[1, 3, 3, 1],
                               strides=[1, 2, 2, 1],
                               padding='VALID',
                               name='pool2')

        with tf.name_scope('conv3') as scope:
            kernel = tf.Variable(conv3_preW, name='weights')
            biases = tf.Variable(conv3_preb, name='biases')
            conv = tf.nn.conv2d(pool2, kernel, [1, 1, 1, 1], padding="SAME")
            bias = tf.nn.bias_add(conv, biases)
            conv3 = tf.nn.relu(bias, name=scope)

        with tf.name_scope('conv4') as scope:

            kernel = tf.Variable(conv4_preW, name='weights')
            biases = tf.Variable(conv4_preb, name='biases')

            input_a, input_b = tf.split(conv3, 2, 3)
            kernel_a, kernel_b = tf.split(kernel, 2, 3)

            with tf.name_scope('A'):
                conv_a = tf.nn.conv2d(input_a,
                                      kernel_a, [1, 1, 1, 1],
                                      padding="SAME")

            with tf.name_scope('B'):
                conv_b = tf.nn.conv2d(input_b,
                                      kernel_b, [1, 1, 1, 1],
                                      padding="SAME")

            conv = tf.concat([conv_a, conv_b], 3)
            bias = tf.nn.bias_add(conv, biases)
            conv4 = tf.nn.relu(bias, name=scope)

        with tf.name_scope('conv5') as scope:

            kernel = tf.Variable(conv5_preW, name='weights')
            biases = tf.Variable(conv5_preb, name='biases')

            input_a, input_b = tf.split(conv4, 2, 3)
            kernel_a, kernel_b = tf.split(kernel, 2, 3)

            with tf.name_scope('A'):
                conv_a = tf.nn.conv2d(input_a,
                                      kernel_a, [1, 1, 1, 1],
                                      padding="SAME")

            with tf.name_scope('B'):
                conv_b = tf.nn.conv2d(input_b,
                                      kernel_b, [1, 1, 1, 1],
                                      padding="SAME")

            conv = tf.concat([conv_a, conv_b], 3)
            bias = tf.nn.bias_add(conv, biases)
            conv5 = tf.nn.relu(bias, name=scope)

        # Maxpool 2
        pool5 = tf.nn.max_pool(conv5,
                               ksize=[1, 3, 3, 1],
                               strides=[1, 2, 2, 1],
                               padding='VALID',
                               name='pool5')

        # Fully connected 6
        with tf.name_scope('fc6'):
            weights = tf.Variable(fc6_preW, name='fc6_weights')
            bias = tf.Variable(fc6_preb, name='fc6_bias')
            shape = tf.shape(pool5)
            size = shape[1] * shape[2] * shape[3]
            z = tf.matmul(tf.reshape(pool5, [-1, size]), weights) + bias
            fc6 = tf.nn.relu(z, name='relu')

        # Fully connected 7
        with tf.name_scope('fc7'):
            weights = tf.Variable(fc7_preW, name='weights')
            bias = tf.Variable(fc7_preb, name='bias')
            z = tf.matmul(fc6, weights) + bias
            fc7 = tf.nn.relu(z, name='relu')

        # Fully connected 8
        with tf.name_scope('fc8'):
            weights = tf.Variable(fc8_preW, name='weights')
            bias = tf.Variable(fc8_preb, name='bias')
            fc8 = tf.matmul(fc7, weights) + bias

        softmax = tf.nn.softmax(fc8)

        init = tf.global_variables_initializer()

    print('Model created.')

    sess = tf.Session(graph=graph)
    sess.run(init)

    print('Exporting TensorBoard graph to tbout/alexnet')
    writer = tf.summary.FileWriter('tbout/alexnet', graph=graph)
    writer.close()

    print('Exporting TensorFlow model to data/alexnet')
    with graph.as_default():
        saver = tf.train.Saver()
        save_path = saver.save(sess, 'data/alexnet')
Beispiel #22
0
########################################################################################
### Subreddit loop and reply function
### Scan through subreddits and classifies food images
########################################################################################

for submission in subreddit.hot(limit=5):

    if submission.id not in reply_log.Post_ID.values:  # Check that the bot hasn't already replied to this post
        if submission.stickied: # Ignore stickied posts e.g. rules
            continue

        # Download and save picture and thumbnail
        if submission_is_image:
            filename = submission.id + ".jpg"
            download(submission.url, filename, pic_dest)
            download(submission.thumbnail, filename, thumbnail_dest)

            # Call CNN to classify and estimate calories
            #classification, calories = food_CNN(pic_dest, filename)
            classification, calories = (None, None)

            # Reply to post and generate new log entry
            text = "Hi" # TODO: learn to format
            new_reply = send_reply(text, submission, classification, calories)

            # Save reply in dataframe and csv file
            save_reply(new_reply, reply_log, csv_file)

        else:
            print("Already replied to post")
Beispiel #23
0
    def parse_detail(self, response):
        category = ''
        name = ''
        address = ''
        city = ''
        phone = ''
        fax = ''
        email = ''
        website = ''
        description = ''
        url = response.url or ''
        image_url = ''
        # check type
        lis = response.css('.comp-body li')
        trs = response.css('table.table.description tr')
        if len(lis) > 0:
            # type 1
            for li in lis:
                k = li.css('::text').get().strip().split(':')[0].strip()
                v = li.css('::text').get().strip().split(':')[-1].strip()
                if len(k) == 0:
                    continue
                if 'Company Name' in k:
                    name = v
                elif 'Address' in k:
                    address = v
                elif 'Telephone' in k:
                    phone = li.css('a::text').get()
                elif 'Fax' in k:
                    fax = v
                elif 'Email' in k:
                    email = li.css('a::text').get()
            # description
            description = []
            for p in response.css('.comp-row > p::text'):
                txt = p.get().strip()
                if len(txt) == 0 or 'Description' in txt:
                    continue
                description.append(txt)
            description = ' '.join(description)
            # website
            website = response.css('.comp-row > p > a::attr(href)').get() or ''
            if self.name in website:
                website = ''
            # category
            category = response.css('.title-comp .col-sm-10::text')[-1].get()
            # image_url
            image_url = response.css('.img-container img::attr(src)').get() or ''
        elif len(trs) > 0:
            # type 2
            for tr in trs:
                k = tr.css('td::text')[0].get()
                v = tr.css('td::text')[-1].get()
                if len(k) == 0:
                    continue
                if 'Nama Perusahaan' in k:
                    name = v
                elif 'Alamat' in k:
                    address = tr.css('td')[-1].css('p::text').get()
                elif 'Kategori' in k:
                    category = v
                elif 'Telepon' in k:
                    phone = tr.css('td')[-1].css('a::text').get()
                elif 'Fax' in k:
                    fax = tr.css('td')[-1].css('a::text').get()
                elif 'Email' in k:
                    email = tr.css('td')[-1].css('a::text').get()
            # description
            description = []
            for p in response.css('.container > p::text'):
                txt = p.get().strip()
                if len(txt) == 0:
                    continue
                description.append(txt)
            description = ' '.join(description)
            # website
            website = response.css('a.btn.btn-contactus.btn-go-to::attr(href)').get() or ''
            if self.name in website:
                website = ''
            # image_url
            image_url = response.css('img.center-img::attr(src)').get() or ''

        if email is None or len(email) == 0:
            self.logger.info('{} : EMPTY EMAIL'.format(url))
            email = ''
        if phone is None or len(phone) == 0:
            self.logger.info('{} : EMPTY PHONE'.format(url))
            phone = ''

        # if len(email) > 0 and len(phone) > 0:
        name = helpers.fix_title(name)
        slug = helpers.get_slug(name)
        if image_url is not None and len(image_url) > 0:
            image_url = image_url.strip()
            ext = image_url.split('.')[-1]
            image_name = slug
            target_dir = 'images/{}/{}.{}'.format(self.name, image_name, ext)
            self.logger.info('downloading image: {} => {}'.format(image_url, target_dir))
            r = helpers.download(image_url, target_dir)
            if not r:
                self.logger.info('Failed download {} => {}'.format(image_url, target_dir))
        yield {
            'category': category.strip(),
            'name': name.strip(),
            'slug': slug.strip(),
            'address': address.strip(),
            'city': city.strip(),
            'phone': phone.strip(),
            'email': email.strip(),
            'website': website.strip(),
            'description': description.strip(),
            'url': url.strip(),
        }
Beispiel #24
0
def getData():
    r = download('https://adventofcode.com/2018/day/25/input')
    return np.array([[int(y) for y in x.decode().split(',')] for x in r.iter_lines()])
Beispiel #25
0
 def __init__(self, cache_dir):
     path = download(type(self).URL, cache_dir)
     lines = self._read(path)
     data, target = self._parse(lines)
     self.data, self.target = self._pad(data, target)
Beispiel #26
0
def getData():
    r = download('https://adventofcode.com/2018/day/14/input')
    return int(r.text.strip())
Beispiel #27
0
def getData():
    r = download('https://adventofcode.com/2018/day/1/input')
    return list(map((lambda x: int(x)), r.iter_lines()))
Beispiel #28
0
def getData():
    r = download('https://adventofcode.com/2018/day/5/input')
    # Deliberately return the byte encoding
    return np.array([x[0] for x in r.iter_content() if x is not b'\n'])
Beispiel #29
0
def link_crawler(start_url,
                 link_regex,
                 robots_url=None,
                 user_agent='statista',
                 max_depth=-1,
                 delay=3,
                 proxies=None,
                 num_retries=2,
                 cache=None,
                 scraper_callback=None):

    #: Initialze a crawl queue with a seed url to start the crawl from
    crawl_queue = [start_url]

    #: keep track of seen urls
    seen = {}

    robots = {}

    throttle = Throttle(delay)

    #: start the crawl
    while crawl_queue:
        url = crawl_queue.pop()

        #: robots.txt
        robots_file_present = False
        if 'http' not in url:
            continue

        #: Get the domain
        domain = '{}://{}'.format(urlparse(url).scheme, urlparse(url).netloc)

        #: Get the robot parser for this domain from the robots dictionary
        robot_parser = robots.get(domain)

        #: set a default robots url and a parser for it if there isn't one
        if not robot_parser and domain not in robots:
            robots_url = '{}/robots.txt'.format(domain)
            robot_parser = get_robots_parser(robots_url)
            if not robot_parser:
                #: continue to crawl even if there are problems finding robots.txt
                #: file
                robots_file_present = True
            # associate each domain with a corresponding parser, whether
            # present or not
            robots[domain] = robot_parser

        elif domain in robots:
            robots_file_present = True

        #: crawl only when url passes robots.txt restrictions
        if robots_file_present or robot_parser.can_fetch(user_agent, url):
            depth = seen.get(url, 0)
            if depth == max_depth:
                #: Skip link if you have crawled it more than max depth
                print('Skipping %s due to depth' % url)
                continue
            throttle.wait(url)
            html = download(url, num_retries=num_retries)
            if not html:
                continue
            if scraper_callback:
                scraper_callback(url, html)

            #: Get all links from page and filter only those matching given pattern
            for link in get_links(html):
                if re.search(link_regex, link):
                    if 'http' not in link:
                        # check if link is well formed and correct
                        if link.startswith('//'):
                            link = '{}:{}'.format(urlparse(url).scheme, link)
                        elif link.startswith('://'):
                            link = '{}{}'.format(urlparse(url).scheme, link)
                        else:
                            link = urljoin(domain, link)

                    if link not in seen:
                        seen[link] = depth + 1
                        crawl_queue.append(link)
        else:
            print('Blocked by robots.txt:', url)
Beispiel #30
0
def getData():
    r = download('https://adventofcode.com/2018/day/2/input')
    return list(r.iter_lines())
    def parse_detail(self, response):
        category = response.css('ol.breadcrumb.pull-left > li > a')[-1].css('::text').get() or ''
        name = response.css('h1.business-title span::text').get() or ''
        address = []
        city = response.css('span[itemprop=addressLocality]::text').get() or ''
        phone = response.css('span[itemprop=telephone]::text').get() or ''
        email = ''
        website = response.css('ul.dropdown-menu > li > a[itemprop=url]::attr(href)').get() or ''
        description = []
        url = response.url or ''

        # email
        try:
            cfemail = response.css('span.__cf_email__::attr(data-cfemail)').get() or ''
            if len(cfemail) > 0:
                email = helpers.cfDecodeEmail(cfemail)
        except:
            email = ''

        # address
        address_1 = response.css('h4 > span > span::text')
        address_2 = response.css('h4 > span::text')
        for index, a1 in enumerate(address_1):
            a1 = a1.get().strip()
            a2 = address_2[index].get().strip()
            address.append(a1)
            address.append(a2)
        address = ' '.join(address)
        address = address.replace(' ,', ',')

        # description
        for txt in response.css('.col-sm-12 > p p'):
            d = txt.css('::text').get() or ''
            description.append(d.strip())
        description = '. '.join(description)
        description = description.replace('..', '.')
        description = description.replace('. . ', '. ')
        description = description.replace('. . ', '. ')

        if len(email) == 0:
            self.logger.info('{} : EMPTY EMAIL'.format(url))
        if len(phone) == 0:
            self.logger.info('{} : EMPTY PHONE'.format(url))

        if len(email) > 0 and len(phone) > 0:
            image_url = response.css('.detail-listing-img > img::attr(src)').get()
            if image_url is not None and image_url[-1] != '/':
                image_url = image_url.strip()
                ext = image_url.split('.')[-1]
                image_name = helpers.get_slug(helpers.fix_title(name))
                target_dir = 'images/{}/{}'.format(self.name, image_name)
                self.logger.info('downloading image: {} => {}'.format(image_url, target_dir))
                helpers.download(image_url, target_dir)
            yield {
                'category': category.strip(),
                'name': name.strip(),
                'address': address.strip(),
                'city': city.strip(),
                'phone': phone.strip(),
                'email': email.strip(),
                'website': website.strip(),
                'description': description.strip(),
                'url': url.strip(),
            }
Beispiel #32
0
def getData():
    r = download('https://adventofcode.com/2018/day/4/input')
    # Make an array for the year 1518 (which is not a leap year)
    initialData = [None] * 12
    for i, v in enumerate([31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]):
        initialData[i] = [None] * v
    regex = re.compile(r'^\[(\d+)-(\d+)-(\d+) (\d+):(\d+)\] (?:(w)akes up|(f)alls asleep|Guard #(\d+) begins shift)$')
    guardSeen = []
    guardMap = []
    guardMaxDays = 0
    for line in [x.decode() for x in r.iter_lines()]:
        linematch = regex.match(line)
        # Two of the last 3 groups (w,f,g) will return None since they weren't matched
        _, m, d, H, M, w, f, g = [(x if not x or x in ['w', 'f'] else int(x)) for x in linematch.groups()]
        # Handle a special case where guards start just before midnight
        if H == 23:
            # Roll over a month if we're on the last day, else add a day
            if d > len(initialData[m]):
                m += 1
                d = 1
            else:
                d += 1

        # Days and months are 1-indexed, so decrement one now to get the index
        m -= 1
        d -= 1

        # Initialise this entry in the array if it hasn't been seen yet
        if not initialData[m][d]:
            initialData[m][d] = {'g': 0, 'w': [], 'f': []}

        if w or f:  # This line is a wake or fall asleep action
            action = w if w else f
            # Store the minutes at which this action happened on this day in reverse order -
            initialData[m][d][action] = sorted(initialData[m][d][action][:] + [M], reverse=True)
        else:  # Then this is a guard shift start
            # If we haven't seen this guard before, add them to the guardMap
            # Maintaining a map like this rather than indexing on the raw id compacts the dataset a lot
            # For the input I had, this reduces it from (3468, 19, 60) to only (22, 19, 60)
            if g not in guardMap:
                guardMapIndex = len(guardMap)
                guardMap.append(g)
            else:
                guardMapIndex = guardMap.index(g)

            # Set the guardMap index for this date
            initialData[m][d]['g'] = guardMapIndex

            # Keep track of the number of times we've seen each guard
            if guardMapIndex < len(guardSeen):
                guardSeen[guardMapIndex] += 1
            else:
                guardSeen.append(1)

            # Keep track of the most days we've seen any particular guard
            guardMaxDays = max(guardMaxDays, guardSeen[guardMapIndex])

    # Build the template for the numpy array, in shape (guard count, guardMaxDays, 60)
    # Add one to guardMaxId to avoid 0-index issues
    template = [None] * len(guardMap)
    for i in range(len(guardMap)):
        template[i] = [None] * guardMaxDays
        for j in range(guardMaxDays):
            template[i][j] = [False] * 60
    data = np.array(template)
    # Loop over the intial data again to convert it into something a little more useful
    for month in initialData:
        for day in month:
            # We don't have data for some days
            if day:
                g = day['g']
                # We use the seen counter to track where we insert this day's data
                # Decrement comes first because of 0-indexing
                guardSeen[g] -= 1
                while len(day['f']) > 0:
                    # Pop the earliest pair of falling/waking off the end of the lists
                    # This works because we sorted in reverse order before
                    f = day['f'].pop()
                    w = day['w'].pop()
                    # Set each minute in the interval to True for this (guard id, seen count) pair
                    data[g][guardSeen[g]][f:w] = [True for _ in range(f, w)]

    return data, guardMap