Example #1
0
def interpolate_null(geneset_size_enrichments, size_skip, go_size):
    # find range
    size_less = go_size
    size_more = go_size
    for d in range(size_skip + 1):
        if not size_less in geneset_size_enrichments:
            size_less -= 1
        if not size_more in geneset_size_enrichments:
            size_more += 1

    # compute interpolation weights
    max_dist = 1 + max(go_size - size_less, size_more - go_size)
    w_less = float(max_dist - (go_size - size_less))
    w_more = float(max_dist - (size_more - go_size))

    # compute mean, sd
    mean = (w_less * util.mean(geneset_size_enrichments[size_less]) + w_more *
            util.mean(geneset_size_enrichments[size_more])) / (w_less + w_more)
    sd = (w_less * util.sd(geneset_size_enrichments[size_less]) + w_more *
          util.sd(geneset_size_enrichments[size_more])) / (w_less + w_more)

    return mean, sd
Example #2
0
def read_trip(data):
    """
    The main loop is ugly, I know, but I'm doing everything in the same place for performance reasons.
    """
    vectors = read_lines(data)
    stats = {}
    distances = []
    accelerations = []

    roundto1 = partial(round, ndigits=1)
    total_distance = 0.0
    resultant_vector = (0, 0)
    total_acceleration = 0.0
    total_average_speed = 0.0

    for i in range(1, len(vectors)):
        distance = roundto1(edist(vectors[i], vectors[i - 1]))
        distances.append(distance)
        total_distance += distance
        resultant_vector = (resultant_vector[0] + vectors[i][0],
                            resultant_vector[1] + vectors[i][1])
        average_speed = total_distance / i
        total_average_speed += average_speed
        acceleration = distance - distances[-1]
        accelerations.append(acceleration)
        total_acceleration += acceleration

    # Drop origin.
    #vectors = vectors[1:]
    n = len(vectors)

    # ?
    accelerations = [0] + accelerations

    # Calculate the angle in degrees between the final vector and the x-axis. Use atan2
    # because it knows about components signal and thus generates correct angles.
    # PS. We gotta have a better way to do this.
    final_angle = np.digitize(
        [degrees(atan2(resultant_vector[1], resultant_vector[0]))],
        np.linspace(0, 360, 13))[0]

    mean_inst_speed = total_distance / n
    sd_inst_speed = sd(distances, mean_inst_speed)

    mean_average_speed = total_average_speed / n
    sd_average_speed = sd(distances, mean_average_speed)

    mean_acceleration = total_acceleration / n
    sd_acceleration = sd(accelerations, mean_acceleration)

    # Bin and normalize instant speeds frequencies to transform them into feature vector.
    distances = np.array(distances)
    bins = np.linspace(distances.min(), distances.max(), 80)
    digitized = np.digitize(distances, bins)
    counts = np.bincount(digitized)
    spread = float(counts.max() - counts.min())
    counts = (counts - counts.min()) / spread

    for n, d in enumerate(counts):
        s = 'speed%d' % (n)
        stats[s] = d

    stats['distance'] = total_distance
    stats['points'] = n
    stats['mean_inst_speed'] = mean_inst_speed
    stats['sd_inst_speed'] = sd_inst_speed
    stats['mean_avg_speed'] = mean_average_speed
    stats['sd_avg_speed'] = sd_average_speed
    stats['mean_acceleration'] = mean_acceleration
    stats['sd_acceleration'] = sd_acceleration
    stats['final_angle'] = final_angle
    # WARNUNG: experimental!
    #stats['vars1'] = var_inst_speed + var_average_speed
    #stats['vars2'] = var_inst_speed + var_acceleration
    #stats['vars3'] = var_average_speed + var_acceleration
    #stats['vars4'] = var_inst_speed + var_average_speed + var_acceleration

    return stats