Example #1
0
 def load(self,dataflow):
     """
         Configure engine according to the given dataflow.
         
         :param dataflow: dataflow object or filename of a dataflow file.
         :type dataflow: :py:class:`DataFlow` or string
         :return: True on success, False on fail.
     """
     if type(dataflow) is str:
         df = DataFlow()
         if df.load(dataflow):
             dataflow = df
     if type(dataflow) is DataFlow:
         return yc.engine_load(self.ptr,dataflow.ptr) and True or False
     raise TypeError('dataflow parameter must be a DataFlow object or dataflow filename !')
Example #2
0
 def load(self, dataflow):
     """
         Configure engine according to the given dataflow.
         
         :param dataflow: dataflow object or filename of a dataflow file.
         :type dataflow: :py:class:`DataFlow` or string
         :return: True on success, False on fail.
     """
     if type(dataflow) is str:
         df = DataFlow()
         if df.load(dataflow):
             dataflow = df
     if type(dataflow) is DataFlow:
         return yc.engine_load(self.ptr, dataflow.ptr) and True or False
     raise TypeError(
         'dataflow parameter must be a DataFlow object or dataflow filename !'
     )
 def addFeature(self, definition):
     """
         Add a feature defined according the :ref:`feature definition syntax <featplan>`.
         
         :param definition: feature definition
         :type definition: string
         :rtype: True on success, False on fail.
     """
     data = definition.split(':')
     if not len(data) == 2:
         print 'Syntax error in "%s"' % definition
         return False
     name, featdef = data
     dataflow = DataFlow()
     inputNode = dataflow.createInput('audio', self.audio_params)
     if featdef.strip():
         for s in featdef.split('>'):
             s = s.strip()
             bb = s.split(' ')
             feat = AudioFeatureFactory.get_feature(bb[0])
             if not feat:
                 return False
             params = {}
             for d in bb[1:]:
                 if len(d) == 0:
                     continue
                 if not '=' in d:
                     print 'Invalid feature parameter "%s"' % d
                     return False
                 dd = d.split('=')
                 if not len(dd) == 2:
                     print 'Syntax error in feature parameter "%s"' % d
                     return False
                 params[dd[0]] = dd[1]
             dataflow.append(feat.get_dataflow(params, self.sample_rate))
     fNode = dataflow.finalNodes()[0]
     feat_attrs = self.out_attrs.copy()
     feat_attrs['yaafedefinition'] = featdef.strip()
     outNode = dataflow.createOutput(name, feat_attrs)
     dataflow.link(fNode, '', outNode, '')
     self.dataflow.merge(dataflow)
     return True
 def __init__(self, sample_rate=44100, normalize=None, resample=False):
     if type(normalize) == int:
         normalize = '%i' % normalize
     elif type(normalize) == float:
         normalize = '%f' % normalize
     elif normalize and type(normalize) != str:
         normalize = str(normalize)
     self.features = {}
     self.resample = resample
     self.sample_rate = sample_rate
     self.audio_params = {
         'SampleRate': str(sample_rate),
         'Resample': 'yes' if resample else 'no'
     }
     if normalize:
         self.audio_params['RemoveMean'] = 'yes'
         self.audio_params['ScaleMax'] = normalize
     self.out_attrs = {
         'normalize': normalize or '-1',
         'version': yaafecore.getYaafeVersion(),
         'samplerate': str(sample_rate),
         'resample': 'yes' if resample else 'no'
     }
     self.dataflow = DataFlow()
 def addFeature(self, definition):
     """
         Add a feature defined according the :ref:`feature definition syntax <featplan>`.
         
         :param definition: feature definition
         :type definition: string
         :rtype: True on success, False on fail.
     """
     data = definition.split(":")
     if not len(data) == 2:
         print 'Syntax error in "%s"' % definition
         return False
     name, featdef = data
     dataflow = DataFlow()
     inputNode = dataflow.createInput("audio", self.audio_params)
     if featdef.strip():
         for s in featdef.split(">"):
             s = s.strip()
             bb = s.split(" ")
             feat = AudioFeatureFactory.get_feature(bb[0])
             if not feat:
                 return False
             params = {}
             for d in bb[1:]:
                 if len(d) == 0:
                     continue
                 if not "=" in d:
                     print 'Invalid feature parameter "%s"' % d
                     return False
                 dd = d.split("=")
                 if not len(dd) == 2:
                     print 'Syntax error in feature parameter "%s"' % d
                     return False
                 params[dd[0]] = dd[1]
             dataflow.append(feat.get_dataflow(params, self.sample_rate))
     fNode = dataflow.finalNodes()[0]
     feat_attrs = self.out_attrs.copy()
     feat_attrs["yaafedefinition"] = featdef.strip()
     outNode = dataflow.createOutput(name, feat_attrs)
     dataflow.link(fNode, "", outNode, "")
     self.dataflow.merge(dataflow)
     return True
 def __init__(self, sample_rate=44100, normalize=None, resample=False):
     if type(normalize) == int:
         normalize = "%i" % normalize
     elif type(normalize) == float:
         normalize = "%f" % normalize
     elif normalize and type(normalize) != str:
         normalize = str(normalize)
     self.features = {}
     self.resample = resample
     self.sample_rate = sample_rate
     self.audio_params = {"SampleRate": str(sample_rate), "Resample": "yes" if resample else "no"}
     if normalize:
         self.audio_params["RemoveMean"] = "yes"
         self.audio_params["ScaleMax"] = normalize
     self.out_attrs = {
         "normalize": normalize or "-1",
         "version": yaafecore.getYaafeVersion(),
         "samplerate": str(sample_rate),
         "resample": "yes" if resample else "no",
     }
     self.dataflow = DataFlow()
Example #7
0
import logging

from dataflow import DataFlow
from workers import head_url, node_url, newfile_filter, ignore_filter, file_to_db

logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S',
    filename='/var/log/homulili/scanner.log',
    filemode='w',
)
logger = logging.getLogger(__name__)

master_update_interval = timedelta(minutes=30).total_seconds()
madokami_stage_interval = timedelta(seconds=10).total_seconds()

logger.info('Starting scanner')
df = DataFlow()
x = df.rate_limited_node(target=head_url, interval=master_update_interval)
x = df.rate_limited_node(input=x.out,
                         target=node_url,
                         interval=madokami_stage_interval)
x = df.node(input=x.out, target=newfile_filter)
x = df.node(input=x.out, target=ignore_filter)
x = df.node(input=x.out, num_outputs=0, target=file_to_db)

logger.debug('Scanner graph initialized')

df.run()
class FeaturePlan(object):
    """
        FeaturePlan is a collection of features to extract, configured for a
        specific sample rate.
        
        :param sample_rate: analysis samplerate
        :param normalize: signal maximum normalization, in ]0,1], or `None` to skip normalization.
        
        This collection can be load from a file using the :py:meth:`loadFeaturePlan` method,
        or built by adding features with the :py:meth:`addFeature` method.
        
        Then, the :py:meth:`getDataFlow` method retrieve the corresponding :py:class:`DataFlow` object.
        
        .. doctest::
        
            >>> fp = FeaturePlan(sample_rate=16000)
            >>> fp.addFeature('mfcc: MFCC blockSize=512 stepSize=256')
            True
            >>> fp.addFeature('mfcc_d1: MFCC blockSize=512 stepSize=256 > Derivate DOrder=1')
            True
            >>> fp.addFeature('mfcc_d2: MFCC blockSize=512 stepSize=256 > Derivate DOrder=2')
            True
            >>> df = fp.getDataFlow()
            >>> df.display()
            ...        
    """
    def __init__(self, sample_rate=44100, normalize=None, resample=False):
        if type(normalize) == int:
            normalize = '%i' % normalize
        elif type(normalize) == float:
            normalize = '%f' % normalize
        elif normalize and type(normalize) != str:
            normalize = str(normalize)
        self.features = {}
        self.resample = resample
        self.sample_rate = sample_rate
        self.audio_params = {
            'SampleRate': str(sample_rate),
            'Resample': 'yes' if resample else 'no'
        }
        if normalize:
            self.audio_params['RemoveMean'] = 'yes'
            self.audio_params['ScaleMax'] = normalize
        self.out_attrs = {
            'normalize': normalize or '-1',
            'version': yaafecore.getYaafeVersion(),
            'samplerate': str(sample_rate),
            'resample': 'yes' if resample else 'no'
        }
        self.dataflow = DataFlow()

    def addFeature(self, definition):
        """
            Add a feature defined according the :ref:`feature definition syntax <featplan>`.
            
            :param definition: feature definition
            :type definition: string
            :rtype: True on success, False on fail.
        """
        data = definition.split(':')
        if not len(data) == 2:
            print 'Syntax error in "%s"' % definition
            return False
        name, featdef = data
        dataflow = DataFlow()
        inputNode = dataflow.createInput('audio', self.audio_params)
        if featdef.strip():
            for s in featdef.split('>'):
                s = s.strip()
                bb = s.split(' ')
                feat = AudioFeatureFactory.get_feature(bb[0])
                if not feat:
                    return False
                params = {}
                for d in bb[1:]:
                    if len(d) == 0:
                        continue
                    if not '=' in d:
                        print 'Invalid feature parameter "%s"' % d
                        return False
                    dd = d.split('=')
                    if not len(dd) == 2:
                        print 'Syntax error in feature parameter "%s"' % d
                        return False
                    params[dd[0]] = dd[1]
                dataflow.append(feat.get_dataflow(params, self.sample_rate))
        fNode = dataflow.finalNodes()[0]
        feat_attrs = self.out_attrs.copy()
        feat_attrs['yaafedefinition'] = featdef.strip()
        outNode = dataflow.createOutput(name, feat_attrs)
        dataflow.link(fNode, '', outNode, '')
        self.dataflow.merge(dataflow)
        return True

    def loadFeaturePlan(self, filename):
        """
            Loads feature extraction plan from a file. The file must be a text file,
            where each line defines a feature (see :ref:`feature definition syntax <feat-def-format>`).
            
            :rtype: True on success, False on fail. 
        """
        fin = open(filename, 'r')
        for line in fin:
            if line.startswith('#'):
                continue
            line = line.strip()
            if line:
                if not self.addFeature(line):
                    return False
        fin.close()
        return True

    def getDataFlow(self):
        """
            Get the :py:class:`DataFlow` object representing how to extract defined features.
            
            :rtype: DataFlow
        """
        return self.dataflow
class FeaturePlan(object):
    """
        FeaturePlan is a collection of features to extract, configured for a
        specific sample rate.
        
        :param sample_rate: analysis samplerate
        :param normalize: signal maximum normalization, in ]0,1], or `None` to skip normalization.
        
        This collection can be load from a file using the :py:meth:`loadFeaturePlan` method,
        or built by adding features with the :py:meth:`addFeature` method.
        
        Then, the :py:meth:`getDataFlow` method retrieve the corresponding :py:class:`DataFlow` object.
        
        .. doctest::
        
            >>> fp = FeaturePlan(sample_rate=16000)
            >>> fp.addFeature('mfcc: MFCC blockSize=512 stepSize=256')
            True
            >>> fp.addFeature('mfcc_d1: MFCC blockSize=512 stepSize=256 > Derivate DOrder=1')
            True
            >>> fp.addFeature('mfcc_d2: MFCC blockSize=512 stepSize=256 > Derivate DOrder=2')
            True
            >>> df = fp.getDataFlow()
            >>> df.display()
            ...        
    """

    def __init__(self, sample_rate=44100, normalize=None, resample=False):
        if type(normalize) == int:
            normalize = "%i" % normalize
        elif type(normalize) == float:
            normalize = "%f" % normalize
        elif normalize and type(normalize) != str:
            normalize = str(normalize)
        self.features = {}
        self.resample = resample
        self.sample_rate = sample_rate
        self.audio_params = {"SampleRate": str(sample_rate), "Resample": "yes" if resample else "no"}
        if normalize:
            self.audio_params["RemoveMean"] = "yes"
            self.audio_params["ScaleMax"] = normalize
        self.out_attrs = {
            "normalize": normalize or "-1",
            "version": yaafecore.getYaafeVersion(),
            "samplerate": str(sample_rate),
            "resample": "yes" if resample else "no",
        }
        self.dataflow = DataFlow()

    def addFeature(self, definition):
        """
            Add a feature defined according the :ref:`feature definition syntax <featplan>`.
            
            :param definition: feature definition
            :type definition: string
            :rtype: True on success, False on fail.
        """
        data = definition.split(":")
        if not len(data) == 2:
            print 'Syntax error in "%s"' % definition
            return False
        name, featdef = data
        dataflow = DataFlow()
        inputNode = dataflow.createInput("audio", self.audio_params)
        if featdef.strip():
            for s in featdef.split(">"):
                s = s.strip()
                bb = s.split(" ")
                feat = AudioFeatureFactory.get_feature(bb[0])
                if not feat:
                    return False
                params = {}
                for d in bb[1:]:
                    if len(d) == 0:
                        continue
                    if not "=" in d:
                        print 'Invalid feature parameter "%s"' % d
                        return False
                    dd = d.split("=")
                    if not len(dd) == 2:
                        print 'Syntax error in feature parameter "%s"' % d
                        return False
                    params[dd[0]] = dd[1]
                dataflow.append(feat.get_dataflow(params, self.sample_rate))
        fNode = dataflow.finalNodes()[0]
        feat_attrs = self.out_attrs.copy()
        feat_attrs["yaafedefinition"] = featdef.strip()
        outNode = dataflow.createOutput(name, feat_attrs)
        dataflow.link(fNode, "", outNode, "")
        self.dataflow.merge(dataflow)
        return True

    def loadFeaturePlan(self, filename):
        """
            Loads feature extraction plan from a file. The file must be a text file,
            where each line defines a feature (see :ref:`feature definition syntax <feat-def-format>`).
            
            :rtype: True on success, False on fail. 
        """
        fin = open(filename, "r")
        for line in fin:
            if line.startswith("#"):
                continue
            line = line.strip()
            if line:
                if not self.addFeature(line):
                    return False
        fin.close()
        return True

    def getDataFlow(self):
        """
            Get the :py:class:`DataFlow` object representing how to extract defined features.
            
            :rtype: DataFlow
        """
        return self.dataflow
Example #10
0
import logging

from dataflow import DataFlow
from workers import get_manga_ids, urls_from_db, name_file, download_file, update_db

logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S',
    filename='/var/log/homulili/scraper.log',
    filemode='w',
)
logger = logging.getLogger(__name__)

master_interval = timedelta(minutes=5).total_seconds()
madokami_file_interval = timedelta(seconds=30).total_seconds()

logger.info('Starting scraper')
df = DataFlow()
x = df.rate_limited_node(interval=master_interval, target=get_manga_ids)
x = df.node(input=x.out, target=urls_from_db)
x = df.node(input=x.out, target=name_file)
x = df.rate_limited_node(interval=madokami_file_interval,
                         input=x.out,
                         target=download_file)
x = df.node(input=x.out, target=update_db, num_outputs=0)

logger.debug('Scraper graph initialized')

df.run()