예제 #1
0
    def on_status(self, status):
        logging.info("writing tweet to couchdb")
        data = None
        try:
            data = analysis.extract(status)
        except Exception as e:
            logging.info("exception in parsing tweet: {}".format(e))
            return True

        data["sentiment"] = analysis.sentiment(data["text"])
        self.db.save(json.loads(json.dumps(data)))
예제 #2
0
    def on_status(self, status):
        logging.info("writing tweet to {}".format(self.file))

        data = None
        try:
            data = analysis.extract(status)
        except Exception as e:
            logging.info("exception in parsing tweet: {}".format(e))
            return True

        data["sentiment"] = analysis.sentiment(data["text"])
        print(data, file=self.file)
        return True
예제 #3
0
    logging.info("using query {}".format(query))

    auth = credentials.authenticate(cred_user)
    api = API(auth, wait_on_rate_limit=True)

    search_results = Cursor(api.search,
                            q=query,
                            count=MAX_COUNT,
                            include_entities=True).items(5000000)

    try:
        for status in search_results:
            logging.info("saving tweet with id {}".format(status.id))

            try:
                data = analysis.extract(status)
            except KeyError as e:
                logging.info("exception in extract: {}".format(e))
                continue

            if data is None:
                continue

            data["sentiment"] = analysis.sentiment(data["text"])

            db.save(json.loads(json.dumps(data)))
            # print(data)
    except error.TweepError as e:  # Should cover RateLimitException
        logging.error("exception in search_results: {}".format(e))
        logging.info("exiting...")
예제 #4
0
def _analyze(signal, sample_rate):
    features = analysis.extract(signal, sample_rate, window=1.0, step=0.5)
    # Normalize the features across all of the vectors.
    means = np.mean(features, axis=1)
    stds = np.std(features, axis=1)
    return (features.T - means) / stds
예제 #5
0
def calc_clips(track, plot=False):
    # Load audio file and normalize for analysis
    signal = audiofile.read(track.source)
    signal = musictoys.analysis.normalize(signal)
    samplerate = signal.sample_rate
    # Extract some features and normalize them around their means.
    # We must transpose the features list because the extractor returns rows
    # of frames, and we want rows of features.
    features = analysis.extract(signal, samplerate, window=1.0, step=0.5).T
    orig_features = features
    features -= np.mean(features, axis=0)
    features /= np.std(features, axis=0)
    # Compute self-similarity, normalize to 0..1
    pairwise_dist = scipy.spatial.distance.pdist(features, 'cosine')
    sim_matrix = 1.0 - scipy.spatial.distance.squareform(pairwise_dist)
    # Score each half-second step for overall similarity.
    score = np.median(sim_matrix, axis=0)
    score -= np.min(score)
    score /= np.max(score)
    best_score = np.argmax(score)

    # Score each half-second step for suitability as the start of a 30-second
    # window - what is the average score for each such window?
    startscore = np.zeros(len(score) - 60)
    for i in range(len(startscore)):
        startscore[i] = np.mean(score[i:i + 60])
    best_starts = np.argsort(startscore)[::-1]
    best_A = best_starts[0]
    best_B = np.extract(np.abs(best_starts - best_A) >= 60.0, best_starts)[0]

    clip_A = signal[int(best_A * 0.5 * samplerate):][:int(samplerate * 30)]
    clip_B = signal[int(best_B * 0.5 * samplerate):][:int(samplerate * 30)]
    feats_A = orig_features[best_A:best_A + 60]
    feats_B = orig_features[best_B:best_B + 60]

    if plot:
        # Plot this stuff out so we can see how we're doing.
        fig = plt.figure(1, figsize=(1024 / 96, 1280 / 96), dpi=96)
        gs = gridspec.GridSpec(3, 1, height_ratios=[1, 1, 12], hspace=0.1)
        plt.set_cmap('hot')
        axMatrix = plt.subplot(gs[2])
        axMatrix.set_aspect(1.)
        axScore = plt.subplot(gs[0], sharex=axMatrix)
        axStart = plt.subplot(gs[1], sharex=axMatrix)

        axScore.matshow(np.tile(score, (36, 1)))
        axScore.axis('off')
        axScore.axvline(best_score)
        axScore.text(best_score, 0, "Max")
        axScore.axvspan(best_A, best_A + 60, color='blue', alpha=0.4)
        axScore.text(best_A, 0, "A")
        axScore.axvspan(best_B, best_B + 60, color='blue', alpha=0.4)
        axScore.text(best_B, 0, "B")

        axStart.plot(startscore)
        axStart.set_ylim([0, 1])
        axStart.get_xaxis().set_visible(False)
        axStart.axvline(best_A)
        axStart.text(best_A, 0, "A")
        axStart.axvline(best_B)
        axStart.text(best_B, 0, "B")

        axMatrix.matshow(sim_matrix)
        axMatrix.axis('off')

        plt.savefig(_cache(track, '.png'), dpi=96, bbox_inches='tight')

    return ((clip_A, feats_A), (clip_B, feats_B))
예제 #6
0
def _calc_feats(path):
    samplerate, data = scipy.io.wavfile.read(path)
    featseries = analysis.extract(data, samplerate)
    return featseries