Beispiel #1
0
def get_capacity_data():
    result = defaultdict(list)
    engine = get_engine()
    cur = engine.cursor()

    # Get Partner Data
    cur.execute("""
        select user_name, avg(ground_truth_rate / speed) capacity
        from valid_tests
        where user_name in (select * from partner_users)
        and speed = 100
        group by user_name
        """)
    for user_name, capacity in cur.fetchall():
        result["partner"].append((user_name, capacity))

    # Get HOT Data
    cur.execute("""
        select user_name, avg(ground_truth_rate / speed) capacity
        from valid_tests
        where user_name in (select * from hot_users)
        and speed = 100
        group by user_name
        """)
    for user_name, capacity in cur.fetchall():
        result["hot"].append((user_name, capacity))

    # Get Bezeq Data
    cur.execute("""
        select user_name, avg(ground_truth_rate / speed) capacity
        from valid_tests
        where user_name in (select * from bezeq_users)
        and speed = 100
        group by user_name
        """)
    for user_name, capacity in cur.fetchall():
        result["bezeq"].append((user_name, capacity))

    # Get All Users Data
    cur.execute("""
        select user_name, avg(ground_truth_rate / speed) capacity
        from valid_tests
        where speed = 100
        group by user_name
        """)
    for user_name, capacity in cur.fetchall():
        result["all"].append((user_name, capacity))

    # Get None Hot Users Data
    cur.execute("""
        select user_name, avg(ground_truth_rate / speed) capacity
        from valid_tests
        where user_name not in (select * from bezeq_users)
        and speed = 100
        group by user_name
        """)
    for user_name, capacity in cur.fetchall():
        result["not_hot"].append((user_name, capacity))

    return result
Beispiel #2
0
def get_household_data() -> List[Dict[str, float]]:
    engine = get_engine()
    cur = engine.cursor()
    cur.execute("""
        with user_household_sizes as (
            select "יוזר" as user_name, "מס_נפשות_בבית"::int as "household_size"
            from testers
            where "מס_נפשות_בבית" is not null
            order by "מס_נפשות_בבית"::int desc
        ),

        user_capacities as (
            select user_name, avg(ground_truth_rate) / speed capacity
            from valid_tests
            where connection = 'LAN'
            group by user_name, speed
        )

        select user_capacities.user_name, capacity, household_size 
        from user_capacities
        join user_household_sizes on user_capacities.user_name = user_household_sizes.user_name
        ;
        """)

    result = []

    for _, capacity, number_of_internet_users in cur.fetchall():
        result.append({
            NUM_USERS: float(number_of_internet_users),
            CAPACITY: float(capacity)
        })

    return result
Beispiel #3
0
def main():
    parser = argparse.ArgumentParser(prog="download",
                                 usage="%(prog)s [options] [paths...]\n",
                                 add_help=True)
    main = parser.add_argument_group("Main")
    main.add_argument('--cache', help='Cache package', dest='cache',
                      action='store_true')
    parser.add_argument('--engine', '-e', help='foo help', dest='engine',
                        default='default')


    args = parser.parse_args()

    if args.engine in utils.get_engines():
        utils.get_engine(args.engine)().install(args)
    else:
        print 'Error: unsupported engine.'
Beispiel #4
0
def get_user_tests_in_time_interval_evening() -> Dict[str, List[TestResult]]:
    engine = get_engine()
    cur: cursor = engine.cursor()
    cur.execute("""
        select user_name, result, speed, infra, isp
        from test_random_sample_evening
        ;   
        """)
    return fetch_sample_results(cur)
Beispiel #5
0
def count_users(vendor: VendorUsers, speed: int) -> int:
    postgres_engine = get_engine()
    cur: cursor = postgres_engine.cursor()
    cur.execute(f"""
        select count(distinct user_name)
        from valid_tests
        where user_name in (select * from {vendor.value})
        and speed = {speed}
        ;
        """)
    return cur.fetchall()[0][0]
Beispiel #6
0
def get_ground_truth_speeds_by_vendor(vendor: VendorUsers, speed: int):
    postgres_engine = get_engine()
    cur: cursor = postgres_engine.cursor()
    cur.execute(f"""
        select ground_truth_rate
        from valid_tests
        where speed = {speed}
        and user_name in (select * from {vendor.value})
        and connection = 'LAN'
        ;
        """)
    return [row[0] for row in cur.fetchall()]
Beispiel #7
0
async def update_schema_for(payload: Dict[str, Any], webhook: str):
    Base = declarative_base()

    # Marshal JSON into SQL-able data
    objects = extract_github_objects(payload, webhook)

    # NB: This has to be before create_all since it passively registers the tables
    [generate_orm(name, obj, Base) for name, obj in objects]

    # # Set up link to DB
    # session, engine = get_session()
    Base.metadata.create_all(get_engine(connection_string()))
def read_users():
    engine = utils.get_engine()
    cur = engine.cursor()
    cur.execute('''
        select distinct
                        "יוזר"
        from testers;
    ''')
    names = []
    for row in cur.fetchall():
        names.append(row[0])
    return names
Beispiel #9
0
def get_speed_test_ratios(speed_test: str) -> List[float]:
    postgres_engine = get_engine()
    cur: cursor = postgres_engine.cursor()
    cur.execute(f"""
        select ground_truth_rate / speed_test_rate as ratio
        from valid_tests
        where true_or_null(is_classic_test)
        and is_classic_resource(file_name)
        and speed_test_rate != 0 and ground_truth_rate != 0
        and website  = '{speed_test}'
        and (ground_truth_rate / speed_test_rate) between 0.01 and 100
        ;
        """)
    return [row[0] for row in cur.fetchall()]
Beispiel #10
0
def calculate_ci_stats_for_user_group(user_group: List[UserStats],
                                      vendor: Vendor,
                                      tests: Dict[str, List[TestResult]],
                                      k: int, default_rates: List[float],
                                      pure: bool, evening: bool):
    test_random_sample = flatten_tests(user_group, tests)
    users_with_ci_results = calcuate_ci_for_user_group(user_group,
                                                       test_random_sample, k)
    suffix = " (evening)" if evening is True else ""
    print(f"{vendor.infra.capitalize()} users (n={len(user_group)})" + suffix)
    for def_rate in default_rates:
        defaulted_users = count_defaulted_users_by_upper_bound(
            users_with_ci_results, def_rate)
        print(
            f"""defaulted {vendor.infra.capitalize()} with default ratio of: {def_rate}:
            {defaulted_users}""" + suffix)
        print(
            f"{vendor.infra.capitalize()} default rate: {defaulted_users / len(user_group)}"
            + suffix)
        print()
    columns = [
        USER_NAME_HEBREW_KEY, USER_SPEED_PROGRAM_KEY_HEBREW, ISP_KEY_HEBREW,
        INFRASTRUCTURE_KEY_HEBREW, SAMPLE_AVERAGE_SPEED_KEY_HEBREW,
        LOWER_BOUND_KEY_HEBREW, UPPER_BOUND_KEY_HEBREW,
        CONFIDENCE_LEVEL_KEY_HEBREW
    ]

    data = pd.DataFrame() \
        .from_records([u.to_dict() for u in users_with_ci_results], columns=columns) \
        .sort_values(UPPER_BOUND_KEY_HEBREW) \

    csv_no_header = data.to_csv(sep=",",
                                columns=columns,
                                index=False,
                                header=False)

    ci_table_name = vendor.infra.lower() + "_ci"
    if evening is True:
        ci_table_name += "_evening"
    if pure is True:
        ci_table_name = "pure_" + ci_table_name
    print(f"copying to: '{ci_table_name}'")
    copy_csv_to_table(StringIO(csv_no_header), get_engine(), ci_table_name)

    spreadsheet_title = get_sheet_title(vendor,
                                        is_pure=pure,
                                        is_evening=evening)
    update_sheet(spreadsheet_title, data)
Beispiel #11
0
async def update_schema_for(payload: Dict[str, Any], webhook: str):
    Base = declarative_base()

    # Only look at allowlisted webhooks
    if webhook not in ACCEPTABLE_WEBHOOKS:
        return {"statusCode": 200, "body": f"not processing {webhook}"}

    # Marshal JSON into SQL-able data
    objects = extract_github_objects(payload, webhook)

    # NB: This has to be before create_all since it passively registers the tables
    [generate_orm(name, obj, Base) for name, obj in objects]

    # # Set up link to DB
    # session, engine = get_session()
    Base.metadata.create_all(get_engine(connection_string()))
Beispiel #12
0
def calc_intervals_speed_test_website_comparisons():
    postgres_engine = get_engine()
    cur: cursor = postgres_engine.cursor()
    for website in ["גוגל", "בזק", "אוקלה", "הוט", "נטפליקס"]:
        cur.execute(get_speed_test_websites_rates(website))
        rates = [x[0] for x in list(cur.fetchall())]
        print("רווח סמך עבור אתר בדיקת מהירות: {}".format(website))
        print("יחס ממוצע: {}".format(round(np.mean(rates), DECIMAL_PLACES)))
        print("סטיית תקן (מדגם): {}".format(
            round(np.std(rates, ddof=1), DECIMAL_PLACES)))
        print("מספר דגימות (N): {}".format(len(rates)))
        confs = [.95, .99, .999]
        for confidence in confs:
            mean, lower_bound, upper_bound, h = calc_confidence_interval(
                rates, confidence)
            lower_bound = round(lower_bound, DECIMAL_PLACES)
            upper_bound = round(upper_bound, DECIMAL_PLACES)
            print(
                "ברמת סמך של {}% יחס מהירות בפועל \ למהירות בדיקה באתר בדיקת המהירות של {} הוא בין {} ל-{}"
                .format(confidence * 100, website, lower_bound, upper_bound))
        print()
Beispiel #13
0
async def handle_webhook(payload: Dict[str, Any], type: str):
    engine = get_engine(connection_string())

    # Only look at allowlisted webhooks
    if type not in ACCEPTABLE_WEBHOOKS:
        return {"statusCode": 200, "body": f"not processing {type}"}

    # Marshal JSON into SQL-able data
    objects = extract_github_objects(payload, type)

    print("Writing", ", ".join([n for n, o in objects]))

    with engine.connect() as conn:
        for tablename, obj in objects:
            # Some of the data is not already in the right form (e.g. dates and
            # lists, so fix that up here)
            obj = transform_data(obj)

            model_data = [tablename] + [column(k) for k in obj.keys()]
            model = table(*model_data)
            upsert(conn, model, obj)

    return {"statusCode": 200, "body": "ok"}
Beispiel #14
0
async def handle_webhook(payload: Dict[str, Any], type: str):
    engine = get_engine(connection_string())

    # Marshal JSON into SQL-able data
    objects = extract_github_objects(payload, type)

    print("Writing", ", ".join([n for n, o in objects]))

    with engine.connect() as conn:
        for tablename, obj in objects:
            # Some of the data is not already in the right form (e.g. dates and
            # lists, so fix that up here)
            obj = transform_data(obj)

            model_data = [tablename] + [column(k) for k in obj.keys()]
            model = table(*model_data)

            if tablename not in existing_schema:
                print(
                    f"Skipping write of {tablename} since it doesn't exist in hardcoded schema"
                )
                continue

            # Remove non-existent fields
            newdata = {}
            for key, value in obj.items():
                if key in existing_schema[tablename]:
                    newdata[key] = value
                else:
                    print(
                        f"Dropping key '{key}' with value '{value}' since it doesn't exist in table {tablename}"
                    )
            obj = newdata
            upsert(conn, model, obj)

    return {"statusCode": 200, "body": "ok"}
Beispiel #15
0
def get_new_data():
    engine = get_engine()
    metrobuses_raw_data = urlopen(METROBUSES_API_URL).read()
    metrobuses_required_data = filter_json_raw_data(metrobuses_raw_data)
    create_historical_points(engine, metrobuses_required_data)
from keras.layers import Dense, Input
from keras.models import Model
from keras.wrappers.scikit_learn import KerasRegressor, KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from utils import get_engine
from sklearn import linear_model
import matplotlib.pyplot as plt

if __name__ == '__main__':

    # load dataset
    table = "data_fraud_little"
    engine = get_engine()
    dataframe = pd.read_sql_query("select * from {table} limit 10".format(table=table),engine)
    dataset = dataframe.values
    print("First one row of the dataset")
    print("Shape [{}]".format(dataset.shape))
    print(dataset[0:2,:])
    # split into input (X) and output (Y) variables
    data_dimensions = 45
    #first dimension is the index, must be removed!!!!
    X = dataset[:, 1:data_dimensions]
    Y = dataset[:, data_dimensions]

    print("Fraud {}% ".format(float(np.sum(Y==1))*100.0/Y.shape[0]))
    print("Total #samples:",Y.shape[0])
    Y = to_categorical(Y, nb_classes=None)
Beispiel #17
0
    id = Column(Integer, primary_key=True)
    name = Column(String(50), nullable=False, unique=True)


# post model:
class Post(Base):
    __tablename__ = 'posts'

    id = Column(Integer, primary_key=True)
    title = Column(String(100), nullable=False)
    content = Column(Text, nullable=False)
    created_on = Column(DateTime, default=datetime.now)
    updated_on = Column(DateTime, default=datetime.now, onupdate=datetime.now)
    category_id = Column(Integer, ForeignKey('categories.id'))
    author_id = Column(Integer, ForeignKey('authors.id'))
    tags = relationship('Tag', secondary=post_tag, backref='posts')


if __name__ == '__main__':
    # default message
    msg = 'action not found!'
    # get action from command line
    action = sys.argv[1]
    if action == 'create':
        Base.metadata.create_all(get_engine())
        msg = 'models created successfully.'
    elif action == 'drop':
        Base.metadata.drop_all(get_engine())
        msg = 'models destroyed.'
    print(msg)
Beispiel #18
0
from typing import List, Union, Optional, Generator
from ipwhois import IPWhois
from utils import get_engine, get_rows

CREATE_WHOIS_CACHE = """
    create table if not exists whois_data (
        cidr cidr,
        reference_filename text,
        data json,
        last_updated timestamptz,
        unique (cidr)
    );
"""

engine = get_engine()
engine.cursor().execute(CREATE_WHOIS_CACHE)
engine.commit()


@ttl_cache(1000)
def whois_lookup(ip: str,
                 file_name: str,
                 use_cache=True) -> Generator[None, None, Optional[dict]]:
    """Perform Whois lookup for a given IP
        :ip: Ip to peform whois lookup
        :returns Optional[dict] with whois data
    """
    ip_obj = ipaddress.ip_address(ip)
    if ip_obj.is_private is True:
        print(ip_obj, "is private")
class TestDataValidity(unittest.TestCase):
    engine = get_engine()
    cur = engine.cursor()

    def get_rows(self, query: str) -> List[tuple]:
        rows = []
        self.cur.execute(query)
        for row in self.cur.fetchall():
            rows.append(row)
        return rows

    def test_ci_table_vendors(self):
        bezeq_vendors = self.get_rows("""
            select 
                   bezeq_ci.ספקית,
                   bezeq_ci.תשתית
            from bezeq_ci
            ;
            """)
        bezeq_vendors += self.get_rows("""
            select 
                   bezeq_ci_evening.ספקית,
                   bezeq_ci_evening.תשתית
            from bezeq_ci_evening
            ;
            """)

        for isp, infra in bezeq_vendors:
            self.assertTrue(isp == "Bezeq International-Ltd"
                            or infra == "BEZEQ")

        hot_vendors = self.get_rows("""
            select 
                   hot_ci.ספקית,
                   hot_ci.תשתית
            from hot_ci
            ;
            """)

        hot_vendors += self.get_rows("""
            select 
                   hot_ci_evening.ספקית,
                   hot_ci_evening.תשתית
            from hot_ci_evening
            ;
            """)

        for isp, infra in hot_vendors:
            self.assertTrue(isp == "Hot-Net internet services Ltd."
                            or infra == "HOT")

        partner_vendors = self.get_rows("""
                  select 
                         partner_ci.ספקית,
                         partner_ci.תשתית
                  from partner_ci
                  ;
                  """)

        partner_vendors += self.get_rows("""
                       select 
                              partner_ci_evening.ספקית,
                              partner_ci_evening.תשתית
                       from partner_ci_evening
                       ;
                       """)

        for isp, infra in partner_vendors:
            self.assertTrue(isp == "Partner Communications Ltd."
                            or infra == "PARTNER")

    def test_ci_evening_tables(self):
        partner_evening_numbers = self.get_rows("""
                       select 
                              partner_ci_evening.שם_משתמש,
                              partner_ci_evening.גבול_עליון,
                              partner_ci_evening.גבול_תחתון,
                              partner_ci_evening.מהירות_ממוצעת_מדגם
                       from partner_ci_evening
                       ;
        """)

        partner_numbers = self.get_rows("""
                       select 
                        partner_ci.שם_משתמש,      
                              partner_ci.גבול_עליון,
                              partner_ci.גבול_תחתון,
                              partner_ci.מהירות_ממוצעת_מדגם
                       from partner_ci
                       ;
        """)

        partner_evening_numbers.sort(key=lambda columns: columns[0])
        partner_numbers.sort(key=lambda columns: columns[0])

        for eve_num, num in zip(partner_evening_numbers, partner_numbers):
            eve_name, eve_upper, eve_lower, eve_average = eve_num
            name, upper, lower, average = num
            self.assertEqual(eve_name, name)
            self.assertNotEqual(eve_average, average)
            self.assertNotEqual(eve_lower, lower)
            self.assertNotEqual(eve_upper, upper)

    def test_random_sample(self):
        random_sample = self.get_rows(
            "select user_name, count(*) from test_random_sample group by user_name"
        )
        for _user_name, count in random_sample:
            self.assertEqual(300, count)

        speeds = self.get_rows("select speed from test_random_sample")
        for speed, in speeds:
            self.assertIn(speed, [100, 200, 500, 1000])

        # random sample data persistency
        randomized_valid_test = self.get_rows(
            "select ground_truth_rate, user_name, file_name, timestamp, random_index"
            " from randomized_valid_tests order by random_index")

        second = (6.437308051433778, 'ben_b', 'go', 1540301748578,
                  1.1055979456386922e-06)
        last_row = (63.250679088980895, 'etl', 'firefox', 1571828370256,
                    0.9999999137277591)
        row_minus_pi = (46.146673387096776, 'ArielG', 'go', 1543489131958,
                        0.8610400669148355)
        row_minus_1000 = (30.569313143358247, 'dor_p', 'dlink', 1563453594052,
                          0.9995655430256711)

        self.assertEqual(second, randomized_valid_test[1])
        self.assertEqual(last_row, randomized_valid_test[-1])
        self.assertEqual(row_minus_pi, randomized_valid_test[-314159])
        self.assertEqual(row_minus_1000, randomized_valid_test[-1000])

        sample = self.get_rows("select user_name, result, timestamp, file_name"
                               " from test_random_sample order by timestamp")
        first_row = ('admin', 18.813411540900443, 1529870019339, 'my-sql')
        last_row = ('dan_florentin', 36.70050094950409, 1587377434453, 'dlink')
        row_minus_1000 = ('artium', 37.905948297764226, 1584833539085, 'dlink')

        self.assertEqual(first_row, sample[0])
        self.assertEqual(last_row, sample[-1])
        self.assertEqual(row_minus_1000, sample[-1000])

        public_access_resources = [
            "file_name", "amazon-workSpaces", "windows-games",
            "windows-games-studio", "my-sql", "dlink", "vlc", "go", "firefox",
            "quicktime"
        ]

        for _, _, _, filename in sample:
            self.assertIn(filename, public_access_resources)

    def test_evening_random_sample(self):
        evening_timestamps = self.get_rows(
            "select timestamp from test_random_sample_evening")
        for timestamp, in evening_timestamps:
            self.assertIn(
                "Evening",
                self.get_rows(f"select get_time_of_day({timestamp})")[0][0])

        random_sample_evening = self.get_rows(
            "select user_name, count(*) from test_random_sample_evening group by user_name"
        )
        for _user_name, count in random_sample_evening:
            self.assertEqual(300, count,
                             f"user: {_user_name} has {count} tests")

        speeds = self.get_rows("select speed from test_random_sample_evening")
        for speed, in speeds:
            self.assertIn(speed, [100, 200, 500, 1000])

    def test_ci_tables(self):
        # Test CI Tables data persistency
        bezeq_ci = self.get_rows("select * from bezeq_ci")
        bezeq_ci_first_row = ('yarden', '100',
                              'Cellcom Fixed Line Communication L.P.', 'BEZEQ',
                              '24.17307459253051', '22.419327471344598',
                              '25.92682171371644', '0.9989583333333333')
        bezeq_ci_last_row = ('raz', '100', 'Hot-Net internet services Ltd.',
                             'BEZEQ', '66.30938669799932',
                             '61.388106035193466', '71.23066736080521',
                             '0.9989583333333333')
        bezeq_ci_tenth_row = ('nimrod', '100', '013 NetVision Ltd', 'BEZEQ',
                              '31.192815980664005', '29.223790648991308',
                              '33.16184131233672', '0.9989583333333333')

        self.assertEqual(bezeq_ci_first_row, bezeq_ci[0])
        self.assertEqual(bezeq_ci_last_row, bezeq_ci[-1])
        self.assertEqual(bezeq_ci_tenth_row, bezeq_ci[10])

        pure_bezeq_ci = self.get_rows("select * from pure_bezeq_ci")
        pure_bezeq_ci_first = ('michael', '100', 'Bezeq International-Ltd',
                               'BEZEQ', '29.66160497821917',
                               '29.077337604229974', '30.245872352208366',
                               '0.9970588235294118')
        pure_bezeq_ci_last = ('alon', '100', 'Bezeq International-Ltd',
                              'BEZEQ', '64.8046254918595', '60.96868027095032',
                              '68.64057071276866', '0.9970588235294118')
        pure_bezeq_ci_tenth = ('rina', '100', 'Bezeq International-Ltd',
                               'BEZEQ', '52.732937250037054',
                               '48.413901608636756', '57.05197289143735',
                               '0.9970588235294118')

        self.assertEqual(pure_bezeq_ci_first, pure_bezeq_ci[0])
        self.assertEqual(pure_bezeq_ci_last, pure_bezeq_ci[-1])
        self.assertEqual(pure_bezeq_ci_tenth, pure_bezeq_ci[10])

        hot_ci = self.get_rows("select * from hot_ci")
        hot_ci_first_row = ('rom', '100', 'Hot-Net internet services Ltd.',
                            'HOT', '12.34002004187883', '10.041289542437283',
                            '14.638750541320395', '0.9985714285714286')
        hot_ci_last_row = ('barak', '100', '013 NetVision Ltd', 'HOT',
                           '114.77583966117818', '104.20295342751203',
                           '125.3487258948444', '0.9985714285714286')
        hot_ci_tenth_row = ('alon_s', '100', 'ITC NG ltd', 'HOT',
                            '49.53545118889848', '43.974820709604955',
                            '55.09608166819204', '0.9985714285714286')

        self.assertEqual(hot_ci_first_row, hot_ci[0])
        self.assertEqual(hot_ci_last_row, hot_ci[-1])
        self.assertEqual(hot_ci_tenth_row, hot_ci[10])

    def test_summary_ci_tables(self):
        isp_or_infra_summary_table = read_sheet(
            "ממצאי רווח סמך (ספקית או תשתית)")
        isp_and_infra_summary_table = read_sheet(
            "ממצאי רווח סמך (ספקית + תשתית)")

        expected_table_isp_or_infra = [{
            "ספקית או תשתית":
            "בזק",
            "מספר משתמשים":
            48,
            "מ. משתמשים שמהירות הגלישה הממוצעת שלהם נמוכה מחצי הבטחת החבילה":
            "23 (47.91%)",
            "מ. משתמשים שמהירות הגלישה הממוצעת שלהם נמוכה מחצי הבטחת החבילה בשעות הערב":
            "26 (54.16%)",
            "מ. משתמשים שמהירות הגלישה הממוצעת שלהם נמוכה משליש הבטחת החבילה":
            "11 (22.91%)",
            "מ. משתמשים שמהירות הגלישה הממוצעת שלהם נמוכה משליש הבטחת החבילה בשעות הערב":
            "11 (22.91%)"
        }, {
            "ספקית או תשתית":
            "הוט",
            "מספר משתמשים":
            35,
            "מ. משתמשים שמהירות הגלישה הממוצעת שלהם נמוכה מחצי הבטחת החבילה":
            "13 (37.14%)",
            "מ. משתמשים שמהירות הגלישה הממוצעת שלהם נמוכה מחצי הבטחת החבילה בשעות הערב":
            "15 (42.85%)",
            "מ. משתמשים שמהירות הגלישה הממוצעת שלהם נמוכה משליש הבטחת החבילה":
            "8 (22.85%)",
            "מ. משתמשים שמהירות הגלישה הממוצעת שלהם נמוכה משליש הבטחת החבילה בשעות הערב":
            "10 (28.57%)"
        }, {
            "ספקית או תשתית":
            "פרטנר",
            "מספר משתמשים":
            14,
            "מ. משתמשים שמהירות הגלישה הממוצעת שלהם נמוכה מחצי הבטחת החבילה":
            "7 (50.00%)",
            "מ. משתמשים שמהירות הגלישה הממוצעת שלהם נמוכה מחצי הבטחת החבילה בשעות הערב":
            "8 (57.14%)",
            "מ. משתמשים שמהירות הגלישה הממוצעת שלהם נמוכה משליש הבטחת החבילה":
            "2 (14.28%)",
            "מ. משתמשים שמהירות הגלישה הממוצעת שלהם נמוכה משליש הבטחת החבילה בשעות הערב":
            "2 (14.28%)"
        }]

        expected_table_isp_and_infra = [{
            "ספקית + תשתית":
            "בזק",
            "מספר משתמשים":
            17,
            "מ. משתמשים שמהירות הגלישה הממוצעת שלהם נמוכה מחצי הבטחת החבילה":
            "7 (41.17%)",
            "מ. משתמשים שמהירות הגלישה הממוצעת שלהם נמוכה מחצי הבטחת החבילה בשעות הערב":
            "7 (41.17%)",
            "מ. משתמשים שמהירות הגלישה הממוצעת שלהם נמוכה משליש הבטחת החבילה":
            "2 (11.76%)",
            "מ. משתמשים שמהירות הגלישה הממוצעת שלהם נמוכה משליש הבטחת החבילה בשעות הערב":
            "2 (11.76%)"
        }, {
            "ספקית + תשתית":
            "הוט",
            "מספר משתמשים":
            20,
            "מ. משתמשים שמהירות הגלישה הממוצעת שלהם נמוכה מחצי הבטחת החבילה":
            "8 (40.00%)",
            "מ. משתמשים שמהירות הגלישה הממוצעת שלהם נמוכה מחצי הבטחת החבילה בשעות הערב":
            "10 (50.00%)",
            "מ. משתמשים שמהירות הגלישה הממוצעת שלהם נמוכה משליש הבטחת החבילה":
            "5 (25.00%)",
            "מ. משתמשים שמהירות הגלישה הממוצעת שלהם נמוכה משליש הבטחת החבילה בשעות הערב":
            "6 (30.00%)"
        }, {
            "ספקית + תשתית":
            "פרטנר",
            "מספר משתמשים":
            6,
            "מ. משתמשים שמהירות הגלישה הממוצעת שלהם נמוכה מחצי הבטחת החבילה":
            "2 (33.33%)",
            "מ. משתמשים שמהירות הגלישה הממוצעת שלהם נמוכה מחצי הבטחת החבילה בשעות הערב":
            "3 (50.00%)",
            "מ. משתמשים שמהירות הגלישה הממוצעת שלהם נמוכה משליש הבטחת החבילה":
            "2 (33.33%)",
            "מ. משתמשים שמהירות הגלישה הממוצעת שלהם נמוכה משליש הבטחת החבילה בשעות הערב":
            "2 (33.33%)"
        }]

        self.assertEqual(expected_table_isp_or_infra,
                         isp_or_infra_summary_table)
        self.assertEqual(expected_table_isp_and_infra,
                         isp_and_infra_summary_table)
class CITablesInit:
    engine = get_engine()

    def init_all_ci_tables(self):
        self.engine.cursor().execute("""
                DROP TABLE IF EXISTS bezeq_ci;
                CREATE TABLE bezeq_ci (
                    "שם_משתמש" text,
                    "תכנית" text,
                    "ספקית" text,
                    "תשתית" text,
                    "מהירות_ממוצעת_מדגם" text,
                    "גבול_תחתון" text,
                    "גבול_עליון" text,
                    "רמת_סמך" text
                );
                
                DROP TABLE IF EXISTS pure_bezeq_ci;
                CREATE TABLE pure_bezeq_ci (
                    "שם_משתמש" text,
                    "תכנית" text,
                    "ספקית" text,
                    "תשתית" text,
                    "מהירות_ממוצעת_מדגם" text,
                    "גבול_תחתון" text,
                    "גבול_עליון" text,
                    "רמת_סמך" text
                );
                
                DROP TABLE IF EXISTS bezeq_ci_evening;
                CREATE TABLE bezeq_ci_evening (
                    "שם_משתמש" text,
                    "תכנית" text,
                    "ספקית" text,
                    "תשתית" text,
                    "מהירות_ממוצעת_מדגם" text,
                    "גבול_תחתון" text,
                    "גבול_עליון" text,
                    "רמת_סמך" text
                );
                
                DROP TABLE IF EXISTS pure_bezeq_ci_evening;
                CREATE TABLE pure_bezeq_ci_evening (
                    "שם_משתמש" text,
                    "תכנית" text,
                    "ספקית" text,
                    "תשתית" text,
                    "מהירות_ממוצעת_מדגם" text,
                    "גבול_תחתון" text,
                    "גבול_עליון" text,
                    "רמת_סמך" text
                );
                
                DROP TABLE IF EXISTS hot_ci;
                CREATE TABLE hot_ci (
                    "שם_משתמש" text,
                    "תכנית" text,
                    "ספקית" text,
                    "תשתית" text,
                    "מהירות_ממוצעת_מדגם" text,
                    "גבול_תחתון" text,
                    "גבול_עליון" text,
                    "רמת_סמך" text
                );
                
                DROP TABLE IF EXISTS pure_hot_ci;
                CREATE TABLE pure_hot_ci (
                    "שם_משתמש" text,
                    "תכנית" text,
                    "ספקית" text,
                    "תשתית" text,
                    "מהירות_ממוצעת_מדגם" text,
                    "גבול_תחתון" text,
                    "גבול_עליון" text,
                    "רמת_סמך" text
                );
                
                DROP TABLE IF EXISTS hot_ci_evening;
                CREATE TABLE hot_ci_evening (
                    "שם_משתמש" text,
                    "תכנית" text,
                    "ספקית" text,
                    "תשתית" text,
                    "מהירות_ממוצעת_מדגם" text,
                    "גבול_תחתון" text,
                    "גבול_עליון" text,
                    "רמת_סמך" text
                );
                    
                DROP TABLE IF EXISTS pure_hot_ci_evening;
                CREATE TABLE pure_hot_ci_evening (
                    "שם_משתמש" text,
                    "תכנית" text,
                    "ספקית" text,
                    "תשתית" text,
                    "מהירות_ממוצעת_מדגם" text,
                    "גבול_תחתון" text,
                    "גבול_עליון" text,
                    "רמת_סמך" text
                );
                
                DROP TABLE IF EXISTS partner_ci;
                CREATE TABLE partner_ci (
                    "שם_משתמש" text,
                    "תכנית" text,
                    "ספקית" text,
                    "תשתית" text,
                    "מהירות_ממוצעת_מדגם" text,
                    "גבול_תחתון" text,
                    "גבול_עליון" text,
                    "רמת_סמך" text
                );
                
                DROP TABLE IF EXISTS pure_partner_ci;
                CREATE TABLE pure_partner_ci (
                    "שם_משתמש" text,
                    "תכנית" text,
                    "ספקית" text,
                    "תשתית" text,
                    "מהירות_ממוצעת_מדגם" text,
                    "גבול_תחתון" text,
                    "גבול_עליון" text,
                    "רמת_סמך" text
                );
                
                DROP TABLE IF EXISTS partner_ci_evening;
                CREATE TABLE partner_ci_evening (
                    "שם_משתמש" text,
                    "תכנית" text,
                    "ספקית" text,
                    "תשתית" text,
                    "מהירות_ממוצעת_מדגם" text,
                    "גבול_תחתון" text,
                    "גבול_עליון" text,
                    "רמת_סמך" text
                );
                
                DROP TABLE IF EXISTS pure_partner_ci_evening;
                CREATE TABLE pure_partner_ci_evening (
                    "שם_משתמש" text,
                    "תכנית" text,
                    "ספקית" text,
                    "תשתית" text,
                    "מהירות_ממוצעת_מדגם" text,
                    "גבול_תחתון" text,
                    "גבול_עליון" text,
                    "רמת_סמך" text
                );
            """)
        self.engine.commit()
Beispiel #21
0
class RandomSample:
    engine = get_engine()

    def execute_persistent(self, query):
        self.engine.cursor().execute(query)
        self.engine.commit()

    def iterate_lines(self, query):
        cur = self.engine.cursor()
        cur.execute(query)
        for row in cur.fetchall():
            yield row

    def create_user_stats_view(self):
        self.execute_persistent("""
            create or replace view user_stats as (
            select user_name,
                   min(to_israel_dst_aware(timestamp)) first_test,
                   min(to_israel_dst_aware(timestamp)) + interval '30' day first_test_plus_30_days,
                   min(to_israel_dst_aware(timestamp)) + interval '60' day first_test_plus_60_days,
                   count(*) num_test
            from valid_tests
            where connection = 'LAN'
            and speed not in (15, 30, 40)
            and is_classic_resource(file_name)
            group by user_name
            );
            """)

    def create_randomized_valid_test_table(self):
        self.execute_persistent("""
            drop table if exists randomized_valid_tests;
            select setseed(0.314159265359);
            create table if not exists randomized_valid_tests as (
                select random() as random_index, is_evening(timestamp) evening, *
                 from valid_tests
                 order by random_index
            );
            create index on randomized_valid_tests(timestamp);
            create index on randomized_valid_tests(user_name);
            create index on randomized_valid_tests(random_index);
            ;
        """)

    def create_random_sample(self):
        self.execute_persistent("""
            drop table if exists test_random_sample;
            
            create table if not exists test_random_sample(
                id serial primary key,
                user_name text,
                result float,
                speed integer,
                isp text,
                infra text,
                connection text,
                file_name text,
                timestamp bigint
            );
            
            do $$
            declare uname text;
            begin
                for uname in (select distinct user_name from valid_tests order by user_name)
                loop
                   insert into test_random_sample(user_name, result, speed, isp, infra, connection, file_name, timestamp)
                   select randomized_valid_tests.user_name, ground_truth_rate, speed, isp, infrastructure, connection, file_name, timestamp
                   from   randomized_valid_tests
                   join user_stats on randomized_valid_tests.user_name = user_stats.user_name
                   where randomized_valid_tests.user_name = uname
                    and to_israel_dst_aware(timestamp) between first_test and first_test_plus_30_days
                    and connection = 'LAN'
                    and num_test >= 700
                    and is_classic_resource(file_name)
                    order by random_index limit 300;
            end loop;
                end;
            $$;
        """)

    def create_random_sample_evening(self):
        self.execute_persistent("""
            drop table if exists test_random_sample_evening;
            
            create table if not exists test_random_sample_evening(
                id serial primary key,
                user_name text,
                result float,
                speed integer,
                isp text,
                infra text,
                connection text,
                file_name text,
                timestamp bigint)
            ;
            
            do $$
            declare uname text;
            begin
                for uname in (select distinct user_name from valid_tests order by user_name)
                loop
            insert into test_random_sample_evening(user_name, result, speed, isp, infra, connection, file_name, timestamp)
               select randomized_valid_tests.user_name, ground_truth_rate, speed, isp, infrastructure, connection, file_name, timestamp
               from   randomized_valid_tests
               join user_stats on randomized_valid_tests.user_name = user_stats.user_name
               where randomized_valid_tests.user_name = uname
                and to_israel_dst_aware(timestamp) between first_test and first_test_plus_60_days
                and connection = 'LAN'
                and num_test >= 700
                and is_classic_resource(file_name)
                and evening is True
                order by random_index limit 300;
            end loop;
                end;
            $$;
        """)
Beispiel #22
0
def speed_test_website_scatter():
    engine = get_engine()

    all_resources_query = """
          select website_to_hebrew(website) "אתר בדיקה",
          avg(ground_truth_rate / speed_test_rate) "יחס מהירות בפועל למהירות בדיקה"
          from valid_tests
          where (ground_truth_rate / speed_test_rate) between 0.01 and 100
          and  true_or_null(is_classic_test)
          and  ground_truth_rate > 0 and speed_test_rate > 0
          and not website = 'atnt'
          group by website_to_hebrew(website)
          order by  "יחס מהירות בפועל למהירות בדיקה"
          ;
      """

    public_servers_query = """
          select website_to_hebrew(website) "אתר בדיקה",
          avg(ground_truth_rate / speed_test_rate) "יחס מהירות בפועל למהירות בדיקה"
          from valid_tests
          where (ground_truth_rate / speed_test_rate) between 0.01 and 100
          and  true_or_null(is_classic_test)
          and  ground_truth_rate > 0 and speed_test_rate > 0
          and is_classic_resource(file_name)
          and not website = 'atnt'
          group by website
          order by  "יחס מהירות בפועל למהירות בדיקה"
          ;
      """

    israel_cache_query = """
    select website_to_hebrew(website) "אתר בדיקה",      
          avg(ground_truth_rate / speed_test_rate) "יחס מהירות בפועל למהירות בדיקה"
          from valid_tests
          where (ground_truth_rate / speed_test_rate) between 0.01 and 100
          and  true_or_null(is_classic_test)
          and  ground_truth_rate > 0 and speed_test_rate > 0
          and file_name = 'israel_cache'
          and not website = 'atnt'
          group by website
          order by  "יחס מהירות בפועל למהירות בדיקה"
      ;
      """

    cur = engine.cursor()
    lables = "כל המקורות", "קבצים ציבוריים", "שרת מטמון ישראל"
    for label, query in zip(
            lables,
        [all_resources_query, public_servers_query, israel_cache_query]):
        print(f"handeling: {label}")
        cur.execute(query)
        all_resources = list(cur.fetchall())
        plot.scatter(x=[normalize_hebrew(x[0]) for x in all_resources],
                     y=[x[1] for x in all_resources],
                     label=normalize_hebrew(label))

    first_on_x, last_on_x = normalize_hebrew('הוט'), normalize_hebrew('גוגל')
    plot.hlines(y=1,
                xmin=first_on_x,
                xmax=last_on_x,
                colors='aqua',
                linestyles='dotted',
                lw=2,
                label=normalize_hebrew('חיזוי מדויק'))

    plot.legend(loc="best")
    plot.ylabel(normalize_hebrew('יחס בדיקת אתר למהירות בפועל'))
    snapshots_path = Path("question_snapshots") / Path(
        'ground_truth_violin_plots')
    if not os.path.exists(snapshots_path):
        os.makedirs(snapshots_path)

    snapshots_path = create_snapshot_path("website_comparison_scatter")

    fig_path = snapshots_path / Path("השוואת ממוצעי אתרי בדיקה" + ".png")
    plot.savefig(fig_path)
    print(f"saving {fig_path}")
    plot.show()