Esempio n. 1
0
def group(data, group_by, sample_width):
    """Groups the data rows read from csv files into groups of data."""
    import datetime

    groups = [[[]]]
    for i in range(4):
        groups[0].append([])

    cutoff = 'second'
    beginning_of_period = time.datetime_from_row(
        data[get_first_data_point_index(data)], cutoff)
    period = datetime.timedelta(days=1000000)
    if group_by == 'day':
        period = datetime.timedelta(days=1)
    elif group_by == 'week':
        period = datetime.timedelta(weeks=1)
    sample_width = datetime.timedelta(minutes=sample_width)
    group_index = 0
    first_round = True

    for row in data:
        datetime_current_row = time.datetime_from_row(row, cutoff)
        # Is it time to start adding data from the beginning of the period again?
        if datetime_current_row - beginning_of_period >= period:
            first_round = False
            beginning_of_period += period
            while datetime_current_row - beginning_of_period >= period:
                beginning_of_period += period
            group_index = math.floor(
                (datetime_current_row - beginning_of_period) / sample_width)
        # Did we just go outside the current sample? A sample is e.g. a single box in a box plot.
        new_index = math.floor(
            (datetime_current_row - beginning_of_period) / sample_width)
        if new_index != group_index:
            group_index = new_index
            if first_round:
                groups.append([])
            if len(groups[group_index]) < 1:
                groups[group_index] = [[] for i in range(5)]
        for i in range(5):
            groups[group_index][i].append(row[i])

    return groups
Esempio n. 2
0
def group(data, group_by, sample_width):
    """Groups the data rows read from csv files into groups of data."""
    import datetime

    groups = [[[]]]
    for i in range(4):
        groups[0].append([])

    cutoff = 'second'
    beginning_of_period = time.datetime_from_row(data[get_first_data_point_index(data)], cutoff)
    period = datetime.timedelta(days=1000000)
    if group_by == 'day':
        period = datetime.timedelta(days=1)
    elif group_by == 'week':
        period = datetime.timedelta(weeks=1)
    sample_width = datetime.timedelta(minutes=sample_width)
    group_index = 0
    first_round = True

    for row in data:
        datetime_current_row = time.datetime_from_row(row, cutoff)
        # Is it time to start adding data from the beginning of the period again?
        if datetime_current_row - beginning_of_period >= period:
            first_round = False
            beginning_of_period += period
            while datetime_current_row - beginning_of_period >= period:
                beginning_of_period += period
            group_index = math.floor((datetime_current_row - beginning_of_period) / sample_width)
        # Did we just go outside the current sample? A sample is e.g. a single box in a box plot.
        new_index = math.floor((datetime_current_row - beginning_of_period) / sample_width)
        if new_index != group_index:
            group_index = new_index
            if first_round:
                groups.append([])
            if len(groups[group_index]) < 1:
                groups[group_index] = [[] for i in range(5)]
        for i in range(5):
            groups[group_index][i].append(row[i])

    return groups
Esempio n. 3
0
def filter_weekends(rows, include):
    """Either only include weekends or entirely exclude them."""
    return [
        row for row in rows
        if include ^ numpy.is_busday(time.datetime_from_row(row).date())
    ]
Esempio n. 4
0
def fetch(config, url):
    """Fetches and stores metrics from Sensor at the URL given."""
    new_path = os.path.join(
        config.data_folder,
        datetime.datetime.now().strftime('%Y-%m-%d_%H:%M:%S.csv'))
    new_temp_path = new_path + 'temp'
    if not url.startswith('http://'):
        url = 'http://' + url
    url += '/export.csv'

    if config.verbose:
        click.echo('Fetching data from ' + url + ' and saving it in ' +
                   new_temp_path)

    try:
        previous_path = sorted(glob.glob(config.data_folder + '/*.csv'))[-1]
    except IndexError:
        previous_path = None

    try:
        urllib.request.urlretrieve(url, new_temp_path)
    except urllib.error.URLError as e:
        click.echo('Failed to establish an HTTP connection.')
        click.echo(e.reason)
        sys.exit(1)
    except urllib.error.HTTPError as e:
        click.echo('Managed to connect but failed with HTTP Error code: ' +
                   e.code)
        click.echo(e.reason)
        sys.exit(2)

    try:
        new_rows = csvio.loadOne(new_temp_path)
        if not new_rows[0][0] == "Device:":
            click.echo('Managed to connect and fetch data from something, '
                       'but it was not a CSV from a Comet Web Sensor.')
            click.echo((new_rows[0][0]))
            sys.exit(3)

        # Here we'll try to remove overlapping data points with the last file.
        # It get's nasty due to time ajustments done by the sensor.
        if previous_path is not None:
            previous_rows = csvio.loadOne(previous_path)
            data_start = data.get_first_data_point_index(previous_rows)
            time_of_newest_data_in_previous = time.datetime_from_row(
                previous_rows[data_start], 'second')
            filtered_rows = []
            for row in new_rows:
                if data.not_data_point(row):
                    continue
                time_of_row = time.datetime_from_row(row)
                if time_of_newest_data_in_previous < time_of_row:
                    filtered_rows.append(row)

            if not filtered_rows:
                if config.verbose:
                    click.echo('No new rows found in fetched data.')
                sys.exit(0)
        else:
            filtered_rows = new_rows

        if config.verbose:
            click.echo('Rewriting treated CSV to: ' + new_path)
        csvio.writeRows(filtered_rows, new_path)
    finally:
        os.remove(new_temp_path)
Esempio n. 5
0
def filter_weekends(rows, include):
    """Either only include weekends or entirely exclude them."""
    return [row for row in rows if include ^
            numpy.is_busday(time.datetime_from_row(row).date())]
Esempio n. 6
0
def fetch(config, url):
    """Fetches and stores metrics from Sensor at the URL given."""
    new_path = os.path.join(config.data_folder, datetime.datetime.now().strftime('%Y-%m-%d_%H:%M:%S.csv'))
    new_temp_path = new_path + 'temp'
    if not url.startswith('http://'):
        url = 'http://' + url
    url += '/export.csv'

    if config.verbose:
        click.echo('Fetching data from ' + url + ' and saving it in ' + new_temp_path)

    try:
        previous_path = sorted(glob.glob(config.data_folder + '/*.csv'))[-1]
    except IndexError:
        previous_path = None

    try:
        urllib.request.urlretrieve(url, new_temp_path)
    except urllib.error.URLError as e:
        click.echo('Failed to establish an HTTP connection.')
        click.echo(e.reason)
        sys.exit(1)
    except urllib.error.HTTPError as e:
        click.echo('Managed to connect but failed with HTTP Error code: ' + e.code)
        click.echo(e.reason)
        sys.exit(2)

    try:
        new_rows = csvio.loadOne(new_temp_path)
        if not new_rows[0][0] == "Device:":
            click.echo('Managed to connect and fetch data from something, '
                       'but it was not a CSV from a Comet Web Sensor.')
            click.echo((new_rows[0][0]))
            sys.exit(3)

        # Here we'll try to remove overlapping data points with the last file.
        # It get's nasty due to time ajustments done by the sensor.
        if previous_path is not None:
            previous_rows = csvio.loadOne(previous_path)
            data_start = data.get_first_data_point_index(previous_rows)
            time_of_newest_data_in_previous = time.datetime_from_row(previous_rows[data_start], 'second')
            filtered_rows = []
            for row in new_rows:
                if data.not_data_point(row):
                    continue
                time_of_row = time.datetime_from_row(row)
                if time_of_newest_data_in_previous < time_of_row:
                    filtered_rows.append(row)

            if not filtered_rows:
                if config.verbose:
                    click.echo('No new rows found in fetched data.')
                sys.exit(0)
        else:
            filtered_rows = new_rows

        if config.verbose:
            click.echo('Rewriting treated CSV to: ' + new_path)
        csvio.writeRows(filtered_rows, new_path)
    finally:
        os.remove(new_temp_path)