Ejemplo n.º 1
0
def gen_rule_queries(query_dir, experiment_conf, rule_number, rule):
    '''
    Generate queries for all rules
    '''
    if rule[TYPE] == FIRST:
        gen_first_rule_query(query_dir, rule_number)
    elif rule[TYPE] == PREV:
        gen_prev_rule_query(query_dir, rule_number, rule, 1)
        gen_someprev_rule_query(query_dir, rule_number, rule, 2)
        gen_allprev_rule_query(query_dir, rule_number, rule, 3)
        query = COND_QUERY.format(rn=rule_number)
        filename = query_dir + os.sep + 'r' + str(rule_number) + '.cql'
        write_to_txt(filename, query)
    # All input attributes (except identifier)
    # Get attribute list
    att_list = get_attribute_list(experiment_conf[ATT])
    # Exclude sequence identifier (A1)
    att_list = att_list[1:]
    att_list = ', '.join(att_list)
    # Attributes non in TUP (transitive tuples)
    attnt_list = get_attribute_list(experiment_conf[ATT])
    # Exclude sequence identifier (A1) and TUP attributes (A2, A3)
    attnt_list = attnt_list[3:]
    attnt_list = ', '.join(attnt_list)
    # Generate D_i Pref Queries
    query = DI_PREF_QUERY.format(att=att_list,
                                 attnt=attnt_list,
                                 cond=rule[COND_SIMPLE],
                                 pref=rule[PREF],
                                 rn=rule_number)
    filename = query_dir + os.sep + 'd' + str(rule_number) + '_pref.cql'
    write_to_txt(filename, query)
    # Generate D_i NonPref Queries
    query = \
        DI_NONPREF_QUERY.format(att=att_list, attnt=attnt_list,
                                cond=rule[COND_SIMPLE],
                                nonpref=rule[NONPREF], rn=rule_number)
    filename = query_dir + os.sep + 'd' + str(rule_number) + '_nonpref.cql'
    write_to_txt(filename, query)
    # Generate D_i Queries
    # Get attribute list
    att_list = get_attribute_list(experiment_conf[ATT])
    # Exclude sequence identifier (A1)
    att_list = att_list[1:]
    p_att_list = ['p.' + att for att in att_list]
    p_att_list = ', '.join(p_att_list)
    np_att_list = ['p.' + att + ' AS _' + att for att in att_list]
    np_att_list = ', '.join(np_att_list)
    ceteris_cond = get_ceteris_attributes(experiment_conf)
    ceteris_cond = ['p.' + att + ' = np.' + att for att in ceteris_cond]
    ceteris_cond = ' AND '.join(ceteris_cond)
    filename = query_dir + os.sep + 'd' + str(rule_number) + '.cql'
    query = DI_QUERY.format(p_att=p_att_list,
                            np_att=np_att_list,
                            rn=rule_number,
                            cet_cond=ceteris_cond)
    write_to_txt(filename, query)
Ejemplo n.º 2
0
def gen_conseq_stream(configuration, experiment_conf):
    '''
    Generate data stream
    '''
    # Build attribute list
    att_list = get_attribute_list(experiment_conf[ATT], include_timestamp=True)
    # Get list of sequence identifiers
    id_list = gen_sequence_id_list(experiment_conf[NSQ])
    # Randomize start timestamp for every identifier
    id_start_list = []
    for rec in id_list:
        start = random.randint(0, experiment_conf[RAN] - 1)
        id_start_list.append((rec, start))
    # Get maximum timestamp (maximum range + maximum slide)
    max_ts = get_max_data_timestamp(configuration[PARAMETER])
    # File
    filename = get_data_file(configuration, experiment_conf)
    # First instant
    rec_list = gen_conseq_records(configuration, experiment_conf,
                                  id_start_list, 0)
    write_to_csv(filename, att_list, rec_list)
    # For each timestamp
    for timestamp in range(1, max_ts + 1):
        rec_list = gen_conseq_records(configuration, experiment_conf,
                                      id_start_list, timestamp)
        append_to_csv(filename, att_list, rec_list)
Ejemplo n.º 3
0
def gen_cql_queries(configuration, experiment_conf):
    '''
    Generate queries with CQL original operators equivalent to BESTSEQ operator
    '''
    filename = get_tup_file(configuration)
    gen_transitive_tup(configuration, filename)
    query_dir = get_query_dir(configuration, experiment_conf)
    # Generate z query (sequences)
    query = Z_QUERY.format(ran=experiment_conf[RAN], sli=experiment_conf[SLI])
    filename = query_dir + os.sep + 'z.cql'
    write_to_txt(filename, query)
    # Generate p_join query (join z positions)
    # Get attribute list
    att_list = get_attribute_list(experiment_conf[ATT])
    # Exclude sequence identifier (A1)
    att_list = att_list[1:]
    z1_att_list = ['z1.' + att for att in att_list]
    z1_att_list = ', '.join(z1_att_list)
    z2_att_list = ['z2.' + att + ' AS _' + att for att in att_list]
    z2_att_list = ', '.join(z2_att_list)
    query = P_JOIN_QUERY.format(z1_att=z1_att_list, z2_att=z2_att_list)
    filename = query_dir + os.sep + 'p_join.cql'
    write_to_txt(filename, query)
    # Generate query p (positions to be compared)
    diff_filter = ['NOT ' + att + ' = _' + att for att in att_list]
    diff_filter = ' OR '.join(diff_filter)
    query = P_QUERY.format(p_filter=diff_filter)
    filename = query_dir + os.sep + 'p.cql'
    write_to_txt(filename, query)
    # Get rule list
    rule_list = get_rule_list(configuration, experiment_conf)
    # Generate query t1 (identifier of dominant sequences) and
    # individual rule queries
    query_list = []
    for index, rule in enumerate(rule_list):
        # Generates queries R_i and D_i for each rule
        gen_rule_queries(query_dir, experiment_conf, index + 1, rule)
        query = 'SELECT * FROM d' + str(index + 1)
        query_list.append(query)
    query = '\nUNION\n'.join(query_list) + ';'
    filename = query_dir + os.sep + 't1.cql'
    write_to_txt(filename, query)
    # Generate T_i Queries
    gen_cql_transitive_queries(experiment_conf, query_dir)
    # Generate ID query
    query = ID_QUERY.format(rn=experiment_conf[LEV])
    filename = query_dir + os.sep + 'id.cql'
    write_to_txt(filename, query)
    # Generate query for final result
    query = 'SELECT z.* FROM z, id WHERE z.a1 = id.a1;'
    filename = query_dir + os.sep + 'equiv.cql'
    write_to_txt(filename, query)
Ejemplo n.º 4
0
def gen_cql_w_query(query_dir, experiment_conf):
    '''
    Consider RANGE and SLIDE and generate W relation
    '''
    # Build attribute names list
    att_list = get_attribute_list(experiment_conf[ATT])
    att_str = ', '.join(att_list)
    # W
    query = CQL_W.format(att=att_str,
                         ran=experiment_conf[RAN],
                         sli=experiment_conf[SLI])
    filename = query_dir + os.sep + 'w.cql'
    write_to_txt(filename, query)
Ejemplo n.º 5
0
def gen_cql_final_query(query_dir, experiment_conf):
    '''
    Generate final query equivalent to SEQ operator for a range parameter
    '''
    # Get attribute list
    att_list = get_attribute_list(experiment_conf[ATT], prefix='w.')
    att_str = ', '.join(att_list)
    # List of final position queries
    pos_query_list = []
    for position in range(1, experiment_conf[RAN] + 1):
        pos_query = CQL_PI_FINAL.format(pos=position, att=att_str)
        pos_query_list.append(pos_query)
    # Equivalent is the union of final positions
    query = '\nUNION\n'.join(pos_query_list) + ';'
    filename = query_dir + os.sep + 'equiv.cql'
    write_to_txt(filename, query)
Ejemplo n.º 6
0
def gen_cql_final_query(query_dir, experiment_conf):
    '''
    Generate final query equivalent to ENDSEQ operator
    '''
    filename = query_dir + os.sep + 'equiv.cql'
    if os.path.isfile(filename):
        return
    range_value = experiment_conf[RAN]
    att_list = get_attribute_list(experiment_conf[ATT])
    att_str = ', '.join(att_list)
    pos_query_list = []
    for position in range(1, range_value + 1):
        pos_query = CQL_EQUIV.format(att=att_str, ran=position)
        pos_query_list.append(pos_query)
    query = '\nUNION\n'.join(pos_query_list) + ';'
    out_file = open(filename, 'w')
    out_file.write(query)
    out_file.close()
Ejemplo n.º 7
0
def gen_stream(configuration, experiment_conf):
    '''
    Generate a data stream
    '''
    # Build attribute list
    att_list = get_attribute_list(experiment_conf[ATT], include_timestamp=True)
    # Get list of sequence identifiers
    id_list = gen_sequence_id_list(experiment_conf[NSQ])
    # Get maximum timestamp (maximum range + maximum slide)
    max_ts = get_max_data_timestamp(configuration[PARAMETER])
    filename = get_data_file(configuration, experiment_conf)
    # First timestamp
    rec_list = gen_records(configuration, experiment_conf, id_list, 0)
    write_to_csv(filename, att_list, rec_list)
    # For each timestamp
    for timestamp in range(1, max_ts + 1):
        rec_list = gen_records(configuration, experiment_conf, id_list,
                               timestamp)
        append_to_csv(filename, att_list, rec_list)
Ejemplo n.º 8
0
def get_register_stream(configuration, experiment_conf, include_tup=False):
    '''
    Get register steam string
    '''
    # Get attribute list
    att_list = get_attribute_list(experiment_conf[ATT])
    att_list = [att + ' ' + INTEGER for att in att_list]
    att_str = ', '.join(att_list)
    # Get data filename
    filename = get_data_file(configuration, experiment_conf)
    # Register stream
    text = REG_STREAM_STR.format(atts=att_str, dfile=filename)
    if include_tup:
        text += '\n\n'
        # Register tup table
        filename = get_tup_file(configuration)
        text += REG_TUP_STR.format(dfile=filename)
    text += '\n\n' + '#' * 80 + '\n\n'
    return text
Ejemplo n.º 9
0
def gen_cql_transitive_queries(experiment_conf, query_dir):
    '''
    Generate CQL queries for transitive comparisons
    '''
    # Generate T_i Queries
    # Get attribute list
    att_list = get_attribute_list(experiment_conf[ATT])
    # Exclude sequence identifier (A1)
    att_list = att_list[1:]
    p_att_list = ['p.' + att for att in att_list]
    p_att_list = ', '.join(p_att_list)
    np_att_list = ['np._' + att for att in att_list]
    np_att_list = ', '.join(np_att_list)
    join_att = ['p._' + att + ' = np.' + att for att in att_list]
    join_att = ' AND '.join(join_att)
    for level_number in range(2, experiment_conf[LEV] + 1):
        filename = query_dir + os.sep + 't' + str(level_number) + '.cql'
        prev_level = level_number - 1
        query = TI_QUERY.format(prev_n=prev_level,
                                p_att=p_att_list,
                                np_att=np_att_list,
                                p_np_join=join_att)
        write_to_txt(filename, query)
Ejemplo n.º 10
0
def gen_cql_queries(configuration, experiment_conf):
    '''
    Generate all CQL queries equivalent to CONSEQ operator
    '''
    query_dir = get_query_dir(configuration, experiment_conf)
    filename = query_dir + os.sep + 'table_ots.cql'
    write_to_txt(filename, CQL_TABLE_OTS)
    filename = query_dir + os.sep + 'stream_ots.cql'
    write_to_txt(filename, CQL_STREAM_OTS)
    gen_cql_z_query(query_dir, experiment_conf)
    filename = query_dir + os.sep + 'z_prime.cql'
    write_to_txt(filename, CQL_Z_PRIME)
    filename = query_dir + os.sep + 'p_start.cql'
    write_to_txt(filename, CQL_P_START)
    filename = query_dir + os.sep + 'p_end.cql'
    write_to_txt(filename, CQL_P_END)
    filename = query_dir + os.sep + 'p_start_end.cql'
    write_to_txt(filename, CQL_P_START_END)
    filename = query_dir + os.sep + 'equiv.cql'
    att_list = get_attribute_list(experiment_conf[ATT], 'z.')
    att_list = ', '.join(att_list)
    query = CQL_EQUIV.format(zatt=att_list)
    write_to_txt(filename, query)