Ejemplo n.º 1
0
    def __init__(self,
                 hop_size=0.1,
                 top_n=None,
                 labels=None,
                 weights_path=None,
                 **yamnet_kwargs):
        verify_dependencies(['tensorflow'])
        try:
            sys.path.insert(0, str(YAMNET_PATH))
            self.yamnet = attempt_to_import('yamnet')
            verify_dependencies(['yamnet'])
        except MissingDependencyError:
            msg = ('Yamnet could not be imported. To download and set up '
                   'yamnet, run:\n\tpython -m pliers.support.setup_yamnet')
            raise MissingDependencyError(dependencies=None, custom_message=msg)
        if top_n and labels:
            raise ValueError('Top_n and labels are mutually exclusive '
                             'arguments. Reinstantiate the extractor setting '
                             'top_n or labels to None (or leaving it '
                             'unspecified).')

        MODULE_PATH = path.dirname(self.yamnet.__file__)
        LABELS_PATH = path.join(MODULE_PATH, 'yamnet_class_map.csv')
        self.weights_path = weights_path or path.join(MODULE_PATH, 'yamnet.h5')
        self.hop_size = hop_size
        self.yamnet_kwargs = yamnet_kwargs or {}
        self.params = self.yamnet.params
        self.params.PATCH_HOP_SECONDS = hop_size
        for par, v in self.yamnet_kwargs.items():
            setattr(self.params, par, v)
        if self.params.PATCH_WINDOW_SECONDS != 0.96:
            logging.warning(
                'Custom values for PATCH_WINDOW_SECONDS were '
                'passed. YAMNet was trained on windows of 0.96s. Different '
                'values might yield unreliable results.')

        self.top_n = top_n
        all_labels = pd.read_csv(LABELS_PATH)['display_name'].tolist()
        if labels is not None:
            missing = list(set(labels) - set(all_labels))
            labels = list(set(labels) & set(all_labels))
            if missing:
                logging.warning(f'Labels {missing} do not exist. Dropping.')
            self.labels = labels
            self.label_idx = [
                i for i, l in enumerate(all_labels) if l in labels
            ]
        else:
            self.labels = all_labels
            self.label_idx = list(range(len(all_labels)))
        super(AudiosetLabelExtractor, self).__init__()
Ejemplo n.º 2
0
import base64
import os
from pliers.transformers import Transformer, BatchTransformerMixin
from pliers.utils import (EnvironmentKeyMixin, attempt_to_import,
                          verify_dependencies)


googleapiclient = attempt_to_import('googleapiclient', fromlist=['discovery'])
oauth_client = attempt_to_import('oauth2client.client', 'oauth_client',
                                 ['GoogleCredentials'])


DISCOVERY_URL = 'https://{api}.googleapis.com/$discovery/rest?version={apiVersion}'


class GoogleAPITransformer(Transformer, EnvironmentKeyMixin):

    _env_keys = 'GOOGLE_APPLICATION_CREDENTIALS'
    _log_attributes = ('handle_annotations',)

    def __init__(self, discovery_file=None, api_version='v1', max_results=100,
                 num_retries=3, handle_annotations='prefix'):
        verify_dependencies(['googleapiclient', 'oauth_client'])
        if discovery_file is None:
            if 'GOOGLE_APPLICATION_CREDENTIALS' not in os.environ:
                raise ValueError("No Google application credentials found. "
                                 "A JSON service account key must be either "
                                 "passed as the discovery_file argument, or "
                                 "set in the GOOGLE_APPLICATION_CREDENTIALS "
                                 "environment variable.")
            discovery_file = os.environ['GOOGLE_APPLICATION_CREDENTIALS']
Ejemplo n.º 3
0
''' Stimuli that are inherently associated with remote resources. '''

import os

from .base import load_stims
from .compound import CompoundStim
from .image import ImageStim
from .text import TextStim
from .video import VideoStim
from pliers.utils import (APIDependent, attempt_to_import, verify_dependencies)

twitter = attempt_to_import('twitter')


class TweetStimFactory(APIDependent):
    '''
    An object from which to generate TweetStims, creates an Api instance from
    the python-twitter library

    Args:
        consumer_key (str): A valid consumer key for the Twitter API
        consumer_secret (str): A valid consumer secret key for the Twitter API
        access_token_key (str): A valid access token for the Twitter API
        access_token_secret (str): A valid access token secret for the
            Twitter API

    To get these credentials, visit https://dev.twitter.com/.
    '''

    _env_keys = ('TWITTER_CONSUMER_KEY', 'TWITTER_CONSUMER_SECRET',
                 'TWITTER_ACCESS_TOKEN_KEY', 'TWITTER_ACCESS_TOKEN_SECRET')
Ejemplo n.º 4
0
    LibrosaFeatureExtractor, STFTAudioExtractor, MeanAmplitudeExtractor,
    SpectralCentroidExtractor, SpectralBandwidthExtractor,
    SpectralContrastExtractor, SpectralRolloffExtractor, PolyFeaturesExtractor,
    ZeroCrossingRateExtractor, ChromaSTFTExtractor, ChromaCQTExtractor,
    ChromaCENSExtractor, MelspectrogramExtractor, MFCCExtractor,
    TonnetzExtractor, TempogramExtractor, RMSExtractor,
    SpectralFlatnessExtractor, OnsetDetectExtractor,
    OnsetStrengthMultiExtractor, TempoExtractor, BeatTrackExtractor,
    HarmonicExtractor, PercussiveExtractor, AudiosetLabelExtractor)
from pliers.stimuli import (ComplexTextStim, AudioStim,
                            TranscribedAudioCompoundStim)
from pliers.filters import AudioResamplingFilter
from pliers.utils import attempt_to_import, verify_dependencies

AUDIO_DIR = join(get_test_data_path(), 'audio')
tf = attempt_to_import('tensorflow')


def test_stft_extractor():
    stim = AudioStim(join(AUDIO_DIR, 'barber.wav'), onset=4.2)
    ext = STFTAudioExtractor(frame_size=1.,
                             spectrogram=False,
                             freq_bins=[(100, 300), (300, 3000),
                                        (3000, 20000)])
    result = ext.transform(stim)
    df = result.to_df()
    assert df.shape == (557, 7)
    assert df['onset'][0] == 4.2

    ext = STFTAudioExtractor(frame_size=1., spectrogram=False, freq_bins=5)
    result = ext.transform(stim)
Ejemplo n.º 5
0
from pliers.extractors import Extractor, merge_results
from pliers.transformers import get_transformer
from pliers.utils import attempt_to_import
from six import string_types

sklearn = attempt_to_import('sklearn')
if sklearn:
    class SklearnBase(sklearn.base.TransformerMixin, sklearn.base.BaseEstimator):
        pass
else:
    class SklearnBase():
        pass


class PliersTransformer(SklearnBase):

    ''' Simple wrapper for using pliers within a sklearn workflow.
    Args:
        transformer (Graph or Transformer): Pliers object to execute. Can
            either be a Graph with several transformers chained or a single
            transformer.
    '''

    def __init__(self, transformer):
        if isinstance(transformer, string_types):
            self.transformer = get_transformer(transformer)
        else:
            self.transformer = transformer

    def fit(self, X, y=None):
        return self
Ejemplo n.º 6
0
'''
Extractors that operate primarily or exclusively on Image stimuli.
'''

from functools import partial

import numpy as np
import pandas as pd

from pliers.stimuli.image import ImageStim
from pliers.extractors.base import Extractor, ExtractorResult
from pliers.utils import attempt_to_import, verify_dependencies, listify
from pliers.support.due import due, Url, Doi

cv2 = attempt_to_import('cv2')
face_recognition = attempt_to_import('face_recognition')


class ImageExtractor(Extractor):
    ''' Base Image Extractor class; all subclasses can only be applied to
    images. '''
    _input_type = ImageStim


class BrightnessExtractor(ImageExtractor):
    ''' Gets the average luminosity of the pixels in the image '''

    VERSION = '1.0'

    def _extract(self, stim):
        data = stim.data
Ejemplo n.º 7
0
''' The `graph` module contains tools for constructing and executing graphs
of pliers Transformers. '''

from pliers.extractors.base import merge_results
from pliers.stimuli import __all__ as stim_list
from pliers.transformers import get_transformer
from pliers.utils import (listify, flatten, isgenerator, attempt_to_import,
                          verify_dependencies)
from itertools import chain
from six import string_types
from collections import OrderedDict

import json

pgv = attempt_to_import('pygraphviz', 'pgv')
stim_list.insert(0, 'ExtractorResult')


class Node(object):

    ''' A graph node/vertex. Represents a single transformer, optionally with
    references to children.

    Args:
        name (str): Name of the node
        transformer (Transformer): the Transformer instance at this node
        parameters (kwargs): parameters for initializing the Transformer
    '''

    def __init__(self, transformer, name=None, **parameters):
        self.name = name
Ejemplo n.º 8
0
''' The `graph` module contains tools for constructing and executing graphs
of pliers Transformers. '''

from pliers.extractors.base import merge_results
from pliers.stimuli import __all__ as stim_list
from pliers.transformers import get_transformer
from pliers.utils import (listify, flatten, isgenerator, attempt_to_import,
                          verify_dependencies)
from itertools import chain
from six import string_types
from collections import OrderedDict

import json

pgv = attempt_to_import('pygraphviz', 'pgv')
stim_list.insert(0, 'ExtractorResult')


class Node(object):
    ''' A graph node/vertex. Represents a single transformer, optionally with
    references to children.

    Args:
        name (str): Name of the node
        transformer (Transformer): the Transformer instance at this node
        parameters (kwargs): parameters for initializing the Transformer
    '''
    def __init__(self, transformer, name=None, **parameters):
        self.name = name
        self.children = []
        if isinstance(transformer, string_types):
Ejemplo n.º 9
0
''' Stimuli that are inherently associated with remote resources. '''

import logging
import os

from .base import load_stims
from .compound import CompoundStim
from .image import ImageStim
from .text import TextStim
from .video import VideoStim
from pliers.utils import (APIDependent, attempt_to_import,
                          verify_dependencies)

twitter = attempt_to_import('twitter')


class TweetStimFactory(APIDependent):

    '''
    An object from which to generate TweetStims, creates an Api instance from
    the python-twitter library

    Args:
        consumer_key (str): A valid consumer key for the Twitter API
        consumer_secret (str): A valid consumer secret key for the Twitter API
        access_token_key (str): A valid access token for the Twitter API
        access_token_secret (str): A valid access token secret for the
            Twitter API

    To get these credentials, visit https://dev.twitter.com/.
    '''
Ejemplo n.º 10
0
''' Rev.ai API-based Converter classes '''

import logging
import os
import time

from pliers.stimuli.text import TextStim, ComplexTextStim
from pliers.utils import attempt_to_import, verify_dependencies
from pliers.converters.audio import AudioToTextConverter
from pliers.transformers.api import APITransformer

rev_ai = attempt_to_import('rev_ai')
rev_ai_client = attempt_to_import('rev_ai.apiclient', 'rev_ai_client',
                                  ['RevAiAPIClient'])


class RevAISpeechAPIConverter(APITransformer, AudioToTextConverter):
    ''' Uses the Rev AI speech-to-text API to transcribe an audio file.

    Args:
        access_token (str): API credential access token. Must be passed
            explicitly or stored in the environment variable specified
            in the _env_keys field.
        timeout (int): Number of seconds to wait for audio transcription
            to finish. Defaults to 90 seconds.
        request_rate (int): Number of seconds to wait between polling the
            API for completion.
    '''

    _env_keys = ('REVAI_ACCESS_TOKEN', )
    _log_attributes = ('access_token', 'timeout', 'request_rate')
Ejemplo n.º 11
0
''' Extractor classes based on pre-trained models. '''

import numpy as np
from PIL import Image
from pliers.extractors.image import ImageExtractor
from pliers.extractors.base import ExtractorResult
from pliers.utils import attempt_to_import, verify_dependencies


tf = attempt_to_import('tensorflow')


def _resize_image(image, shape):
    return np.array(
        Image.fromarray(image).resize(shape, resample=Image.BICUBIC))


class TensorFlowKerasInceptionV3Extractor(ImageExtractor):

    ''' Labels objects in images using a pretrained Inception V3 architecture
    implemented in TensorFlow / Keras.

    Images must be RGB and have shape (299, 299). Images will be resized (with
    some distortion) if the shape is different.

    Args:
        weights (str): URL to download pre-trained weights. If None (default),
            uses the pre-trained Inception V3 model (dated 2017-03-10) used in
            Keras Applications.
        num_predictions (int): Number of top predicted labels to retain for
            each image.
Ejemplo n.º 12
0
                               GoogleLanguageAPISentimentExtractor,
                               GoogleLanguageAPISyntaxExtractor,
                               GoogleLanguageAPITextCategoryExtractor,
                               GoogleLanguageAPIEntitySentimentExtractor,
                               ExtractorResult,
                               merge_results)
from pliers.extractors.api.google import GoogleVisionAPIExtractor
from pliers.stimuli import ImageStim, VideoStim, TextStim
from pliers.utils import attempt_to_import, verify_dependencies
import pytest
import json
from os.path import join
from ...utils import get_test_data_path
import numpy as np

googleapiclient = attempt_to_import('googleapiclient', fromlist=['discovery'])

IMAGE_DIR = join(get_test_data_path(), 'image')
VIDEO_DIR = join(get_test_data_path(), 'video')
TEXT_DIR = join(get_test_data_path(), 'text')


@pytest.mark.requires_payment
@pytest.mark.skipif("'GOOGLE_APPLICATION_CREDENTIALS' not in os.environ")
def test_google_vision_api_extractor_inits():
    ext = GoogleVisionAPIExtractor(num_retries=5)
    assert ext.num_retries == 5
    assert ext.max_results == 100
    assert ext.service is not None

Ejemplo n.º 13
0
''' Extractor classes based on pre-trained models. '''

import numpy as np
import pandas as pd

from pliers.extractors.image import ImageExtractor
from pliers.extractors.base import Extractor, ExtractorResult
from pliers.filters.image import ImageResizingFilter
from pliers.stimuli import ImageStim, TextStim
from pliers.stimuli.base import Stim
from pliers.support.exceptions import MissingDependencyError
from pliers.utils import (attempt_to_import, verify_dependencies, listify)

import logging

tf = attempt_to_import('tensorflow')
hub = attempt_to_import('tensorflow_hub')
attempt_to_import('tensorflow.keras')
attempt_to_import('tensorflow_text')


class TFHubExtractor(Extractor):
    ''' A generic class for Tensorflow Hub extractors 
    Args:
        url_or_path (str): url or path to TFHub model. You can
            browse models at https://tfhub.dev/.
        features (optional): list of labels (for classification) 
            or other feature names. The number of items must 
            match the number of features in the output. For example,
            if a classification model with 1000 output classes is passed 
            (e.g. EfficientNet B6, 
Ejemplo n.º 14
0
''' Wit.ai API-based Converters '''

import logging
import os
from abc import abstractproperty
from pliers.stimuli.text import ComplexTextStim
from pliers.utils import attempt_to_import, verify_dependencies
from pliers.converters.audio import AudioToTextConverter
from pliers.transformers.api import APITransformer
from six.moves.urllib.request import Request, urlopen
from six.moves.urllib.error import HTTPError

sr = attempt_to_import('speech_recognition', 'sr')


class SpeechRecognitionAPIConverter(APITransformer, AudioToTextConverter):
    ''' Uses the SpeechRecognition API, which interacts with several APIs,
    like Google and Wit, to run speech-to-text transcription on an audio file.

    Args:
        api_key (str): API key. Must be passed explicitly or stored in
            the environment variable specified in the _env_keys field.
        rate_limit (int): The minimum number of seconds required between
            transform calls on this Transformer.
    '''

    _log_attributes = ('api_key', 'recognize_method')
    VERSION = '1.0'

    @abstractproperty
    def recognize_method(self):
Ejemplo n.º 15
0
''' Classes that represent text or sequences of text. '''

import re
import pandas as pd
from six import string_types
from six.moves.urllib.request import urlopen
from pliers.support.decorators import requires_nltk_corpus
from pliers.utils import attempt_to_import, verify_dependencies
from .base import Stim

pysrt = attempt_to_import('pysrt')


class TextStim(Stim):

    ''' Any simple text stimulus--most commonly a single word.

    Args:
        filename (str): Path to input file, if one exists.
        text (str): Text value to store. If none is provided, value is read
            from filename.
        onset (float): Optional onset of the text presentation (in secs) with
            respect to some more general context or timeline the user wishes
            to keep track of.
        duration (float): Optional duration of the TextStim, in seconds.
        order (int): Optional sequential index of the TextStim within some
            broader context.
        url (str): Optional url to read contents from.
    '''

    _default_file_extension = '.txt'
Ejemplo n.º 16
0
''' Extractor classes based on pre-trained models. '''

import numpy as np
from pliers.extractors.image import ImageExtractor
from pliers.extractors.base import ExtractorResult
from pliers.filters.image import ImageResizingFilter
from pliers.utils import attempt_to_import, verify_dependencies

tf = attempt_to_import('tensorflow')
attempt_to_import('tensorflow.keras')


class TensorFlowKerasApplicationExtractor(ImageExtractor):
    ''' Labels objects in images using a pretrained Inception V3 architecture
    implemented in TensorFlow / Keras.

    Images must be RGB and be a certain shape. Different model architectures
    may require different shapes, and images will be resized (with some
    distortion) if the shape of the image is different.

    Args:
        architecture (str): model architecture to use. One of 'vgg16', 'vgg19',
            'resnet50', 'inception_resnetv2', 'inceptionv3', 'xception',
            'densenet121', 'densenet169', 'nasnetlarge', or 'nasnetmobile'.
        weights (str): URL to download pre-trained weights. If None (default),
            uses the pre-trained weights trained on ImageNet used in Keras
            Applications.
        num_predictions (int): Number of top predicted labels to retain for
            each image.
     '''
Ejemplo n.º 17
0
'''
Extractors that operate primarily or exclusively on Video stimuli.
'''

from pliers.stimuli.video import VideoStim
from pliers.extractors.base import Extractor, ExtractorResult
from pliers.utils import attempt_to_import, verify_dependencies

import numpy as np

cv2 = attempt_to_import('cv2')


class VideoExtractor(Extractor):

    ''' Base Video Extractor class; all subclasses can only be applied to
    video. '''
    _input_type = VideoStim


class FarnebackOpticalFlowExtractor(VideoExtractor):

    ''' Extracts total amount of dense optical flow between every pair of video
    frames.

    Args:
        pyr_scale (float): specifying the image scale (<1) to build pyramids
            for each image; pyr_scale=0.5 means a classical pyramid, where
            each next layer is twice smaller than the previous one.
        levels (int): number of pyramid layers including the initial image;
            levels=1 means that no extra layers are created and only the
Ejemplo n.º 18
0
                               GoogleLanguageAPISentimentExtractor,
                               GoogleLanguageAPISyntaxExtractor,
                               GoogleLanguageAPITextCategoryExtractor,
                               GoogleLanguageAPIEntitySentimentExtractor,
                               ExtractorResult,
                               merge_results)
from pliers.extractors.api.google import GoogleVisionAPIExtractor
from pliers.stimuli import ImageStim, VideoStim, TextStim
from pliers.utils import attempt_to_import, verify_dependencies
import pytest
import json
from os.path import join
from ...utils import get_test_data_path
import numpy as np

googleapiclient = attempt_to_import('googleapiclient', fromlist=['discovery'])

IMAGE_DIR = join(get_test_data_path(), 'image')
VIDEO_DIR = join(get_test_data_path(), 'video')
TEXT_DIR = join(get_test_data_path(), 'text')


@pytest.mark.requires_payment
@pytest.mark.skipif("'GOOGLE_APPLICATION_CREDENTIALS' not in os.environ")
def test_google_vision_api_extractor_inits():
    ext = GoogleVisionAPIExtractor(num_retries=5)
    assert ext.num_retries == 5
    assert ext.max_results == 100
    assert ext.service is not None

Ejemplo n.º 19
0
#import tensorflow_hub as hub
'''bert related helper code'''
from pliers.extractors import bert_modeling
from pliers.extractors import bert_tokenization
from pliers.extractors import bert_extract_features
'''skipthought related helper code '''
from pliers.extractors import skipthoughts
'''SIF related helper code'''
from pliers.extractors import sif_data_io, sif_params, SIF_embedding
''' import elmo related code from AllenAI'''
from allennlp.commands.elmo import ElmoEmbedder

embedding_methods = Enum('embedding_methods',
                         'average_embedding word2vec glove')

keyedvectors = attempt_to_import('gensim.models.keyedvectors', 'keyedvectors',
                                 ['KeyedVectors'])
doc2vecVectors = attempt_to_import('gensim.models.doc2vec', 'doc2vecVectors',
                                   ['Doc2Vec.load'])

logging.getLogger('smart_open').setLevel(logging.ERROR)
logger = logging.getLogger("text_encoding_logger")


class DirectTextExtractorInterface():
    '''
        Args:
        method (str): The name of the embedding methods. The possibilities
        (averageembedding, doc2vec, sif...) will be provided to the users
        via a README.
        (default:averageembedding) 
        
Ejemplo n.º 20
0
''' Filters that operate on TextStim inputs. '''

import numpy as np

from pliers.stimuli.video import VideoStim, VideoFrameCollectionStim
from pliers.utils import attempt_to_import, verify_dependencies
from .base import Filter, TemporalTrimmingFilter

cv2 = attempt_to_import('cv2')


class VideoFilter(Filter):
    ''' Base class for all VideoFilters. '''

    _input_type = VideoStim


class FrameSamplingFilter(Filter):
    ''' Samples frames from video stimuli, to improve efficiency.

    Args:
        every (int): takes every nth frame
        hertz (int): takes n frames per second
        top_n (int): takes top n frames sorted by the absolute difference
         with the next frame
    '''

    _input_type = VideoFrameCollectionStim
    _log_attributes = ('every', 'hertz', 'top_n')
    VERSION = '1.0'
Ejemplo n.º 21
0
''' Rev.ai API-based Converter classes '''

import logging
import os
import time

from pliers.stimuli.text import TextStim, ComplexTextStim
from pliers.utils import attempt_to_import, verify_dependencies
from pliers.converters.audio import AudioToTextConverter
from pliers.transformers.api import APITransformer

rev_ai = attempt_to_import('rev_ai')
rev_ai_client = attempt_to_import('rev_ai.apiclient',
                                  'rev_ai_client',
                                  ['RevAiAPIClient'])


class RevAISpeechAPIConverter(APITransformer, AudioToTextConverter):

    ''' Uses the Rev AI speech-to-text API to transcribe an audio file.

    Args:
        access_token (str): API credential access token. Must be passed
            explicitly or stored in the environment variable specified
            in the _env_keys field.
        timeout (int): Number of seconds to wait for audio transcription
            to finish. Defaults to 90 seconds.
        request_rate (int): Number of seconds to wait between polling the
            API for completion.
    '''
Ejemplo n.º 22
0
''' Extractors that operate on AudioStim inputs. '''
from pliers.stimuli.audio import AudioStim
from pliers.stimuli.text import ComplexTextStim
from pliers.extractors.base import Extractor, ExtractorResult
from pliers.utils import attempt_to_import, verify_dependencies, listify
import numpy as np
from scipy import fft
from abc import ABCMeta

librosa = attempt_to_import('librosa')


class AudioExtractor(Extractor):

    ''' Base Audio Extractor class; all subclasses can only be applied to
    audio. '''
    _input_type = AudioStim


class STFTAudioExtractor(AudioExtractor):

    ''' Short-time Fourier Transform extractor.

    Args:
        frame_size (float): The width of the frame/window to apply an FFT to,
            in seconds.
        hop_size (float): The step size to increment the window by on each
            iteration, in seconds (effectively, the sampling rate).
        freq_bins (list or int): The set of bins or frequency bands to extract
            power for. If an int is passed, this is the number of bins
            returned, with each bin spanning an equal range of frequencies.
Ejemplo n.º 23
0
'''
Extractors that operate primarily or exclusively on Image stimuli.
'''

from pliers.stimuli.image import ImageStim
from pliers.extractors.base import Extractor, ExtractorResult
from pliers.utils import attempt_to_import, verify_dependencies, listify
from pliers.support.due import due, Url, Doi
import numpy as np
import pandas as pd
from functools import partial


cv2 = attempt_to_import('cv2')
face_recognition = attempt_to_import('face_recognition')


class ImageExtractor(Extractor):

    ''' Base Image Extractor class; all subclasses can only be applied to
    images. '''
    _input_type = ImageStim


class BrightnessExtractor(ImageExtractor):

    ''' Gets the average luminosity of the pixels in the image '''

    VERSION = '1.0'

    def _extract(self, stim):
Ejemplo n.º 24
0
import os
try:
    from contextlib import ExitStack
except Exception as e:
    from contextlib2 import ExitStack
from pliers.extractors.image import ImageExtractor
from pliers.extractors.base import ExtractorResult
from pliers.transformers import BatchTransformerMixin
from pliers.transformers.api import APITransformer
from pliers.utils import listify, attempt_to_import, verify_dependencies
import pandas as pd

clarifai_client = attempt_to_import(
    'clarifai.rest.client', 'clarifai_client', [
        'ClarifaiApp', 'Concept', 'ModelOutputConfig', 'ModelOutputInfo',
        'Image'
    ])


class ClarifaiAPIExtractor(APITransformer, BatchTransformerMixin,
                           ImageExtractor):
    ''' Uses the Clarifai API to extract tags of images.

    Args:
        api_key (str): A valid API_KEY for the Clarifai API. Only needs to be
            passed the first time the extractor is initialized.
        model (str): The name of the Clarifai model to use. If None, defaults
            to the general image tagger.
        min_value (float): A value between 0.0 and 1.0 indicating the minimum
            confidence required to return a prediction. Defaults to 0.0.
Ejemplo n.º 25
0
import logging
import os
from pliers.transformers import BatchTransformerMixin
from pliers.transformers.api import APITransformer
from pliers.utils import attempt_to_import, verify_dependencies


googleapiclient = attempt_to_import('googleapiclient', fromlist=['discovery'])
google_auth = attempt_to_import('google.oauth2', 'google_auth',
                                fromlist=['service_account'])


DISCOVERY_URL = 'https://{api}.googleapis.com/$discovery/rest?version={apiVersion}'


class GoogleAPITransformer(APITransformer):
    ''' Base GoogleAPITransformer class.

    Args:
      discovery_file (str): path to discovery file containing Google
        application credentials.
      api_version (str): API version to use.
      max_results (int): Max number of results per page.
      num_retries (int): Number of times to retry query on failure.
      rate_limit (int): The minimum number of seconds required between
            transform calls on this Transformer.
    '''

    _env_keys = 'GOOGLE_APPLICATION_CREDENTIALS'
    _log_attributes = ('discovery_file', 'api_version')
Ejemplo n.º 26
0
''' Core transformer logic. '''

from pliers import config
from pliers.stimuli.base import Stim, _log_transformation, load_stims
from pliers.stimuli.compound import CompoundStim
from pliers.utils import (progress_bar_wrapper, isiterable, isgenerator,
                          listify, batch_iterable, attempt_to_import)
import pliers
from six import with_metaclass, string_types
from abc import ABCMeta, abstractmethod, abstractproperty
import importlib
import logging

multiprocessing = attempt_to_import('pathos.multiprocessing',
                                    'multiprocessing', ['ProcessingPool'])

_cache = {}


class Transformer(with_metaclass(ABCMeta)):

    _log_attributes = ()
    _loggable = True
    VERSION = '0.1'

    # Stim types that *can* be passed as input, but aren't mandatory. This
    # allows for disjunctive specification; e.g., if _input_type is empty
    # and _optional_input_type is (AudioStim, TextStim), then _at least_ one
    # of the two must be passed. If both are specified in _input_type, then
    # the input would have to be a CompoundStim with both audio and text slots.
    _optional_input_type = ()
Ejemplo n.º 27
0
try:
    from contextlib import ExitStack
except Exception as e:
    from contextlib2 import ExitStack
from pliers.extractors.image import ImageExtractor
from pliers.extractors.video import VideoExtractor
from pliers.extractors.base import ExtractorResult
from pliers.transformers import BatchTransformerMixin
from pliers.transformers.api import APITransformer
from pliers.utils import listify, attempt_to_import, verify_dependencies
import pandas as pd

clarifai_client = attempt_to_import('clarifai.rest.client', 'clarifai_client',
                                    ['ClarifaiApp',
                                     'Concept',
                                     'ModelOutputConfig',
                                     'ModelOutputInfo',
                                     'Image',
                                     'Video'])


class ClarifaiAPIExtractor(APITransformer):

    ''' Uses the Clarifai API to extract tags of visual stimuli.

    Args:
        api_key (str): A valid API_KEY for the Clarifai API. Only needs to be
            passed the first time the extractor is initialized.
        model (str): The name of the Clarifai model to use. If None, defaults
            to the general image tagger.
        min_value (float): A value between 0.0 and 1.0 indicating the minimum
Ejemplo n.º 28
0
'''
Extractors that interact with the AWS Rekognition API.
'''

from pliers.stimuli.image import ImageStim
from pliers.extractors.base import Extractor, ExtractorResult
import pandas as pd
import boto3
from pliers.utils import attempt_to_import, verify_dependencies

aws_rekognition_client = attempt_to_import('boto3')


class AwsRekognitionExtractor(ImageStim, Extractor):

    def __init__(self, profile_name=None, region_name=None, extractor_type=None):
        verify_dependencies(['boto3'])
        if profile_name is not None and region_name is not None:
            self.session = boto3.Session(profile_name=profile_name)
            self.rekognition = boto3.Session.client(
                'rekognition', region_name=region_name)

        elif profile_name is not None:
            self.rekognition = boto3.client('rekognition')
            self.session = boto3.Session(profile_name=profile_name)
        else:
            self.rekognition = boto3.client('rekognition')

        self.extractor_type = extractor_type

        super(AwsRekognitionExtractor, self).__init__()
Ejemplo n.º 29
0
''' Converter classes that operate on ImageStim inputs. '''

from PIL import Image
from .base import Converter
from pliers.stimuli.image import ImageStim
from pliers.stimuli.text import TextStim
from pliers.utils import attempt_to_import, verify_dependencies

pytesseract = attempt_to_import('pytesseract')


class ImageToTextConverter(Converter):
    ''' Base ImageToText Converter class; all subclasses can only be applied to
    image and convert to text. '''
    _input_type = ImageStim
    _output_type = TextStim


class TesseractConverter(ImageToTextConverter):
    ''' Uses the Tesseract library to extract text from images. '''

    VERSION = '1.0'

    def _convert(self, stim):
        verify_dependencies(['pytesseract'])
        text = pytesseract.image_to_string(Image.fromarray(stim.data))
        return TextStim(text=text, onset=stim.onset, duration=stim.duration)
Ejemplo n.º 30
0
from pliers.stimuli.text import TextStim, ComplexTextStim
from pliers.extractors.base import Extractor, ExtractorResult
from pliers.support.exceptions import PliersError
from pliers.support.decorators import requires_nltk_corpus
from pliers.datasets.text import fetch_dictionary
from pliers.transformers import BatchTransformerMixin
from pliers.utils import attempt_to_import, verify_dependencies
import numpy as np
import pandas as pd
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import sys
from six import string_types

keyedvectors = attempt_to_import('gensim.models.keyedvectors', 'keyedvectors',
                                 ['KeyedVectors'])
sklearn_text = attempt_to_import('sklearn.feature_extraction.text',
                                 'sklearn_text',
                                 ['VectorizerMixin', 'CountVectorizer'])


class TextExtractor(Extractor):
    ''' Base Text Extractor class; all subclasses can only be applied to text.
    '''
    _input_type = TextStim


class ComplexTextExtractor(Extractor):
    ''' Base ComplexTextStim Extractor class; all subclasses can only be
    applied to ComplexTextStim instance.
    '''
Ejemplo n.º 31
0
import pandas as pd
import numpy as np
from pliers.utils import attempt_to_import, verify_dependencies
import matplotlib.pyplot as plt
from scipy.spatial.distance import mahalanobis
from numpy.linalg import LinAlgError


sns = attempt_to_import('seaborn')


def correlation_matrix(df):
    '''
    Returns a pandas DataFrame with the pair-wise correlations of the columns.

    Args:
        df: pandas DataFrame with columns to run diagnostics on
    '''
    columns = df.columns.tolist()
    corr = pd.DataFrame(
        np.corrcoef(df, rowvar=0), columns=columns, index=columns)
    return corr


def eigenvalues(df):
    '''
    Returns a pandas Series with eigenvalues of the correlation matrix.

    Args:
        df: pandas DataFrame with columns to run diagnostics on
    '''
Ejemplo n.º 32
0
import numpy as np
import pandas as pd
import scipy
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

from pliers.stimuli.text import TextStim, ComplexTextStim
from pliers.extractors.base import Extractor, ExtractorResult
from pliers.support.exceptions import PliersError
from pliers.support.decorators import requires_nltk_corpus
from pliers.datasets.text import fetch_dictionary
from pliers.transformers import BatchTransformerMixin
from pliers.utils import (attempt_to_import, verify_dependencies, flatten,
                          listify)

keyedvectors = attempt_to_import('gensim.models.keyedvectors', 'keyedvectors',
                                 ['KeyedVectors'])
sklearn_text = attempt_to_import('sklearn.feature_extraction.text',
                                 'sklearn_text', ['CountVectorizer'])
spacy = attempt_to_import('spacy')
transformers = attempt_to_import('transformers')


class TextExtractor(Extractor):
    ''' Base Text Extractor class; all subclasses can only be applied to text.
    '''
    _input_type = TextStim


class ComplexTextExtractor(Extractor):
    ''' Base ComplexTextStim Extractor class; all subclasses can only be
    applied to ComplexTextStim instance.
Ejemplo n.º 33
0
''' Extractors that operate on AudioStim inputs. '''

from pliers.stimuli.audio import AudioStim
from pliers.stimuli.text import ComplexTextStim
from pliers.extractors.base import Extractor, ExtractorResult
from pliers.utils import attempt_to_import, verify_dependencies, listify
import numpy as np
from scipy import fft

librosa = attempt_to_import('librosa')


class AudioExtractor(Extractor):
    ''' Base Audio Extractor class; all subclasses can only be applied to
    audio. '''
    _input_type = AudioStim


class STFTAudioExtractor(AudioExtractor):
    ''' Short-time Fourier Transform extractor.

    Args:
        frame_size (float): The width of the frame/window to apply an FFT to,
            in seconds.
        hop_size (float): The step size to increment the window by on each
            iteration, in seconds (effectively, the sampling rate).
        freq_bins (list or int): The set of bins or frequency bands to extract
            power for. If an int is passed, this is the number of bins
            returned, with each bin spanning an equal range of frequencies.
            E.g., if bins=5 and the frequency spectrum runs from 0 to 20KHz,
            each bin will span 4KHz. If a list is passed, each element must be
Ejemplo n.º 34
0
import logging
import os

from pliers.transformers import BatchTransformerMixin
from pliers.transformers.api import APITransformer
from pliers.utils import attempt_to_import, verify_dependencies

googleapiclient = attempt_to_import('googleapiclient', fromlist=['discovery'])
google_auth = attempt_to_import('google.oauth2',
                                'google_auth',
                                fromlist=['service_account'])

DISCOVERY_URL = 'https://{api}.googleapis.com/$discovery/rest?version={apiVersion}'


class GoogleAPITransformer(APITransformer):
    ''' Base GoogleAPITransformer class.

    Args:
      discovery_file (str): path to discovery file containing Google
        application credentials.
      api_version (str): API version to use.
      max_results (int): Max number of results per page.
      num_retries (int): Number of times to retry query on failure.
      rate_limit (int): The minimum number of seconds required between
            transform calls on this Transformer.
    '''

    _env_keys = 'GOOGLE_APPLICATION_CREDENTIALS'
    _log_attributes = ('discovery_file', 'api_version')
Ejemplo n.º 35
0
''' Wit.ai API-based Converters '''

import logging
import os
from abc import abstractproperty
from pliers.stimuli.text import ComplexTextStim
from pliers.utils import attempt_to_import, verify_dependencies
from pliers.converters.audio import AudioToTextConverter
from pliers.transformers.api import APITransformer
from six.moves.urllib.request import Request, urlopen
from six.moves.urllib.error import HTTPError

sr = attempt_to_import('speech_recognition', 'sr')


class SpeechRecognitionAPIConverter(APITransformer, AudioToTextConverter):

    ''' Uses the SpeechRecognition API, which interacts with several APIs,
    like Google and Wit, to run speech-to-text transcription on an audio file.

    Args:
        api_key (str): API key. Must be passed explicitly or stored in
            the environment variable specified in the _env_keys field.
        rate_limit (int): The minimum number of seconds required between
            transform calls on this Transformer.
    '''

    _log_attributes = ('api_key', 'recognize_method')
    VERSION = '1.0'

    @abstractproperty
Ejemplo n.º 36
0
'''
Extractors that interact with the Indico API.
'''

import os
from pliers.extractors.image import ImageExtractor
from pliers.extractors.text import TextExtractor
from pliers.extractors.base import Extractor, ExtractorResult
from pliers.transformers import BatchTransformerMixin
from pliers.utils import (EnvironmentKeyMixin, attempt_to_import,
                          verify_dependencies)
import pandas as pd

indicoio = attempt_to_import('indicoio')


class IndicoAPIExtractor(BatchTransformerMixin, Extractor,
                         EnvironmentKeyMixin):
    ''' Base class for all Indico API Extractors

    Args:
        api_key (str): A valid API key for the Indico API. Only needs to be
            passed the first time the extractor is initialized.
        models (list): The names of the Indico models to use.
    '''

    _log_attributes = ('models', 'model_names')
    _input_type = ()
    _batch_size = 20
    _env_keys = 'INDICO_APP_KEY'
    VERSION = '1.0'
Ejemplo n.º 37
0
from os import path
import sys
import logging

import numpy as np
from scipy import fft
import pandas as pd

from pliers.stimuli.audio import AudioStim
from pliers.stimuli.text import ComplexTextStim
from pliers.extractors.base import Extractor, ExtractorResult
from pliers.utils import attempt_to_import, verify_dependencies, listify
from pliers.support.exceptions import MissingDependencyError
from pliers.support.setup_yamnet import YAMNET_PATH

librosa = attempt_to_import('librosa')
tf = attempt_to_import('tensorflow')


class AudioExtractor(Extractor):
    ''' Base Audio Extractor class; all subclasses can only be applied to
    audio. '''
    _input_type = AudioStim


class STFTAudioExtractor(AudioExtractor):
    ''' Short-time Fourier Transform extractor.

    Args:
        frame_size (float): The width of the frame/window to apply an FFT to,
            in seconds.
Ejemplo n.º 38
0
import pandas as pd
import numpy as np
from pliers.utils import attempt_to_import, verify_dependencies
import matplotlib.pyplot as plt
from scipy.spatial.distance import mahalanobis
from numpy.linalg import LinAlgError

sns = attempt_to_import('seaborn')


def correlation_matrix(df):
    '''
    Returns a pandas DataFrame with the pair-wise correlations of the columns.

    Args:
        df: pandas DataFrame with columns to run diagnostics on
    '''
    columns = df.columns.tolist()
    corr = pd.DataFrame(np.corrcoef(df, rowvar=0),
                        columns=columns,
                        index=columns)
    return corr


def eigenvalues(df):
    '''
    Returns a pandas Series with eigenvalues of the correlation matrix.

    Args:
        df: pandas DataFrame with columns to run diagnostics on
    '''
Ejemplo n.º 39
0
''' Contains sklearn-compatible wrappers for pliers. '''

from pliers.extractors import Extractor, merge_results
from pliers.transformers import get_transformer
from pliers.utils import attempt_to_import
from six import string_types

sklearn = attempt_to_import('sklearn')
if sklearn:
    class SklearnBase(sklearn.base.TransformerMixin,
                      sklearn.base.BaseEstimator):
        pass
else:
    class SklearnBase():
        pass


class PliersTransformer(SklearnBase):

    ''' Simple wrapper for using pliers within a sklearn workflow.

    Args:
        transformer (Graph or Transformer): Pliers object to execute. Can
            either be a Graph with several transformers chained or a single
            transformer.
    '''

    def __init__(self, transformer):
        if isinstance(transformer, string_types):
            self.transformer = get_transformer(transformer)
        else: