def driver_embeddings(Data, alphabet_size=2, delta=0.1): data_dim = len(Data[0]) data_size = len(Data) possible_s = s_vals(Data[0], alphabet_size, data_dim) R_vals = possible_r_vals(delta, possible_s) partitions = shifts_gen.partition_string(Data[0]) random_s_block = all_random_numbers(possible_s, partitions, R_vals) return return_embeddings(Data, random_s_block, R_vals, possible_s)
def s_vals(): x_block = shifts_gen.partition_string(Data[0])[0] s_val = [] s_def = math.log(data_dim * alphabet_size / 2, 2) j = 0 while (True): s = int(math.ceil(s_def**j)) j = j + 1 if s > len(x_block): break s_val.append(s) return s_val
def s_vals(): x_block = shifts_gen.partition_string(Data[0])[0] s_val = [] s_def = math.log(data_dim * alphabet_size/2, 2) j = 0 while(True): s = int(math.ceil(s_def ** j)) j = j + 1 if s > len(x_block):# or j > 2: break s_val.append(s) return s_val
def s_vals(data_0, alphabet_size, data_dim): x_block = shifts_gen.partition_string(data_0)[0] s_val = [] s_def = math.log(data_dim * alphabet_size / 2, 2) j = 0 while (True): s = int(math.ceil(s_def**j)) j = j + 1 # Modification to original algorithm if s > len(x_block) or j > 2: break s_val.append(s) return s_val
def s_vals(data_0, alphabet_size, data_dim): x_block = shifts_gen.partition_string(data_0)[0] s_val = [] s_def = math.log(data_dim * alphabet_size / 2, 2) j = 0 while(True): s = int(math.ceil(s_def ** j)) j = j + 1 # Modification to original algorithm if s > len(x_block) or j > 2: break s_val.append(s) return s_val
def get_shifts_block(x): partitions = shifts_gen.partition_string(x) return [[shifts_gen.shifts(x_block, s) for s in possible_s] for x_block in partitions]
# file_number = sys.argv[2] delta = 0.1 file_number = 'proteinNew' alphabet_size = 26 block_s_metric = defaultdict() # Data = data_generation.data(data_size, data_dim) Data = protein_read.read_file_protein('raw_data/multigene_zfill.txt') data_size = len(Data) data_dim = len(Data[0]) random_s_block = defaultdict() final_metric = defaultdict() # delta = data_generation.delta partitions = shifts_gen.partition_string(Data[0]) num_partitions = len(partitions) def s_vals(): x_block = shifts_gen.partition_string(Data[0])[0] s_val = [] s_def = math.log(data_dim * alphabet_size / 2, 2) j = 0 while (True): s = int(math.ceil(s_def**j)) j = j + 1 if s > len(x_block): break s_val.append(s) return s_val
data_dim = int(sys.argv[2]) delta = float(sys.argv[3]) data_typos = int(sys.argv[4]) file_number = sys.argv[5] alphabet_size = 2 Data = data_generation.random_data_generation(data_size, data_dim) # Data = data_generation.data_typo(data_dim, k=data_typos) data_dim = len(Data[0]) data_size = len(Data) block_s_metric = defaultdict() random_s_block = defaultdict() final_metric = defaultdict() partitions = shifts_gen.partition_string(Data[0]) num_partitions = len(partitions) def s_vals(): x_block = shifts_gen.partition_string(Data[0])[0] s_val = [] s_def = math.log(data_dim * alphabet_size/2, 2) j = 0 while(True): s = int(math.ceil(s_def ** j)) j = j + 1 if s > len(x_block):# or j > 2: break s_val.append(s) return s_val