Ejemplo n.º 1
0
def download_image(url, dest):
    """
    Downloads a single image into provided destination.
    :param url:
    :param dest:
    """
    logging.info('Downloading {} into {}'.format(url, dest))
    dest = um.join_paths(dest, url.split('/')[-1])
    response = requests.get(url)
    if um.is_image_response(response):
        with open(dest, 'wb') as f:
            f.write(response.content)
Ejemplo n.º 2
0
def download_image(url, dest):
    """
    Downloads a single image into provided destination.
    :param url:
    :param dest:
    """
    logging.info('Downloading {} into {}'.format(url, dest))
    dest = um.join_paths(dest, url.split('/')[-1])
    response = requests.get(url)
    if um.is_image_response(response):
        with open(dest, 'wb') as f:
            f.write(response.content)
Ejemplo n.º 3
0
def download_image(url, dest):
    """
    Downloads a single image into provided destination.
    :param url:
    :param dest:
    """
    logging.info('Downloading {} into {}'.format(url, dest))
    dest = um.join_paths(dest, url.split('/')[-1])
    response = requests.get(url)
    if um.is_image_response(response):
        with open(dest, 'wb') as f:
            f.write(response.content)


if __name__ == '__main__':
    parser = um.get_cli_parser()
    parser.add_option("-f", "--file", dest="filename", default=um.join_paths(um.get_script_path(), 'links'),
                      help="data file with urls")
    parser.add_option("-d", "--dest", dest="dest", default=um.join_paths(um.get_script_path(), 'images'),
                      help="destination folder")
    opts = um.parse_cli_options(parser)
    um.set_logging()

    # create destination path if it doesn't exist
    um.make_path(opts.dest)

    with cnc.ProcessPoolExecutor(max_workers=mp.cpu_count()) as executor:
        with open(opts.filename) as f:
            for line in f:
                executor.submit(download_image, line.replace('\n', ''), opts.dest)
Ejemplo n.º 4
0
from sklearn.grid_search import GridSearchCV

__author__ = "Denys Sobchyshak"
__email__ = "*****@*****.**"

# setting up constants
outlier_share = 0.1
validation_share = 0.1

# operational settings
ut.set_logging()
parser = ut.get_cli_parser()
parser.add_option("-f",
                  "--file",
                  dest="filename",
                  default=ut.join_paths(ut.get_script_path(), 'data',
                                        'day.csv'),
                  help="data file with urls")
opts = ut.parse_cli_options(parser)
data_path = opts.filename
is_hour_data = 'hour' in data_path

# loading data
data = pd.read_csv(data_path,
                   parse_dates=[1],
                   index_col=1,
                   date_parser=lambda x: pd.datetime.strptime(x, "%Y-%m-%d"))

logging.info('Cleaning and normalizing the feature space')
# we have no need in an index feature
features = data.drop(['instant'], axis=1)
fields_to_encode = ['season', 'yr', 'mnth', 'weekday', 'weathersit']
Ejemplo n.º 5
0
    :param dest:
    """
    logging.info('Downloading {} into {}'.format(url, dest))
    dest = um.join_paths(dest, url.split('/')[-1])
    response = requests.get(url)
    if um.is_image_response(response):
        with open(dest, 'wb') as f:
            f.write(response.content)


if __name__ == '__main__':
    parser = um.get_cli_parser()
    parser.add_option("-f",
                      "--file",
                      dest="filename",
                      default=um.join_paths(um.get_script_path(), 'links'),
                      help="data file with urls")
    parser.add_option("-d",
                      "--dest",
                      dest="dest",
                      default=um.join_paths(um.get_script_path(), 'images'),
                      help="destination folder")
    opts = um.parse_cli_options(parser)
    um.set_logging()

    # create destination path if it doesn't exist
    um.make_path(opts.dest)

    with cnc.ProcessPoolExecutor(max_workers=mp.cpu_count()) as executor:
        with open(opts.filename) as f:
            for line in f:
Ejemplo n.º 6
0
from sklearn import linear_model
from sklearn.grid_search import GridSearchCV


__author__ = "Denys Sobchyshak"
__email__ = "*****@*****.**"


# setting up constants
outlier_share = 0.1
validation_share = 0.1

# operational settings
ut.set_logging()
parser = ut.get_cli_parser()
parser.add_option("-f", "--file", dest="filename", default=ut.join_paths(ut.get_script_path(), 'data', 'day.csv'),
                  help="data file with urls")
opts = ut.parse_cli_options(parser)
data_path = opts.filename
is_hour_data = 'hour' in data_path

# loading data
data = pd.read_csv(data_path, parse_dates=[1], index_col=1, date_parser=lambda x: pd.datetime.strptime(x, "%Y-%m-%d"))

logging.info('Cleaning and normalizing the feature space')
# we have no need in an index feature
features = data.drop(['instant'], axis=1)
fields_to_encode = ['season', 'yr', 'mnth', 'weekday', 'weathersit']
if is_hour_data:
    fields_to_encode.append('hr')
mt.hot_encode(features, fields_to_encode)