def interpolate_null(geneset_size_enrichments, size_skip, go_size): # find range size_less = go_size size_more = go_size for d in range(size_skip + 1): if not size_less in geneset_size_enrichments: size_less -= 1 if not size_more in geneset_size_enrichments: size_more += 1 # compute interpolation weights max_dist = 1 + max(go_size - size_less, size_more - go_size) w_less = float(max_dist - (go_size - size_less)) w_more = float(max_dist - (size_more - go_size)) # compute mean, sd mean = (w_less * util.mean(geneset_size_enrichments[size_less]) + w_more * util.mean(geneset_size_enrichments[size_more])) / (w_less + w_more) sd = (w_less * util.sd(geneset_size_enrichments[size_less]) + w_more * util.sd(geneset_size_enrichments[size_more])) / (w_less + w_more) return mean, sd
def read_trip(data): """ The main loop is ugly, I know, but I'm doing everything in the same place for performance reasons. """ vectors = read_lines(data) stats = {} distances = [] accelerations = [] roundto1 = partial(round, ndigits=1) total_distance = 0.0 resultant_vector = (0, 0) total_acceleration = 0.0 total_average_speed = 0.0 for i in range(1, len(vectors)): distance = roundto1(edist(vectors[i], vectors[i - 1])) distances.append(distance) total_distance += distance resultant_vector = (resultant_vector[0] + vectors[i][0], resultant_vector[1] + vectors[i][1]) average_speed = total_distance / i total_average_speed += average_speed acceleration = distance - distances[-1] accelerations.append(acceleration) total_acceleration += acceleration # Drop origin. #vectors = vectors[1:] n = len(vectors) # ? accelerations = [0] + accelerations # Calculate the angle in degrees between the final vector and the x-axis. Use atan2 # because it knows about components signal and thus generates correct angles. # PS. We gotta have a better way to do this. final_angle = np.digitize( [degrees(atan2(resultant_vector[1], resultant_vector[0]))], np.linspace(0, 360, 13))[0] mean_inst_speed = total_distance / n sd_inst_speed = sd(distances, mean_inst_speed) mean_average_speed = total_average_speed / n sd_average_speed = sd(distances, mean_average_speed) mean_acceleration = total_acceleration / n sd_acceleration = sd(accelerations, mean_acceleration) # Bin and normalize instant speeds frequencies to transform them into feature vector. distances = np.array(distances) bins = np.linspace(distances.min(), distances.max(), 80) digitized = np.digitize(distances, bins) counts = np.bincount(digitized) spread = float(counts.max() - counts.min()) counts = (counts - counts.min()) / spread for n, d in enumerate(counts): s = 'speed%d' % (n) stats[s] = d stats['distance'] = total_distance stats['points'] = n stats['mean_inst_speed'] = mean_inst_speed stats['sd_inst_speed'] = sd_inst_speed stats['mean_avg_speed'] = mean_average_speed stats['sd_avg_speed'] = sd_average_speed stats['mean_acceleration'] = mean_acceleration stats['sd_acceleration'] = sd_acceleration stats['final_angle'] = final_angle # WARNUNG: experimental! #stats['vars1'] = var_inst_speed + var_average_speed #stats['vars2'] = var_inst_speed + var_acceleration #stats['vars3'] = var_average_speed + var_acceleration #stats['vars4'] = var_inst_speed + var_average_speed + var_acceleration return stats