def load(self,dataflow): """ Configure engine according to the given dataflow. :param dataflow: dataflow object or filename of a dataflow file. :type dataflow: :py:class:`DataFlow` or string :return: True on success, False on fail. """ if type(dataflow) is str: df = DataFlow() if df.load(dataflow): dataflow = df if type(dataflow) is DataFlow: return yc.engine_load(self.ptr,dataflow.ptr) and True or False raise TypeError('dataflow parameter must be a DataFlow object or dataflow filename !')
def load(self, dataflow): """ Configure engine according to the given dataflow. :param dataflow: dataflow object or filename of a dataflow file. :type dataflow: :py:class:`DataFlow` or string :return: True on success, False on fail. """ if type(dataflow) is str: df = DataFlow() if df.load(dataflow): dataflow = df if type(dataflow) is DataFlow: return yc.engine_load(self.ptr, dataflow.ptr) and True or False raise TypeError( 'dataflow parameter must be a DataFlow object or dataflow filename !' )
def addFeature(self, definition): """ Add a feature defined according the :ref:`feature definition syntax <featplan>`. :param definition: feature definition :type definition: string :rtype: True on success, False on fail. """ data = definition.split(':') if not len(data) == 2: print 'Syntax error in "%s"' % definition return False name, featdef = data dataflow = DataFlow() inputNode = dataflow.createInput('audio', self.audio_params) if featdef.strip(): for s in featdef.split('>'): s = s.strip() bb = s.split(' ') feat = AudioFeatureFactory.get_feature(bb[0]) if not feat: return False params = {} for d in bb[1:]: if len(d) == 0: continue if not '=' in d: print 'Invalid feature parameter "%s"' % d return False dd = d.split('=') if not len(dd) == 2: print 'Syntax error in feature parameter "%s"' % d return False params[dd[0]] = dd[1] dataflow.append(feat.get_dataflow(params, self.sample_rate)) fNode = dataflow.finalNodes()[0] feat_attrs = self.out_attrs.copy() feat_attrs['yaafedefinition'] = featdef.strip() outNode = dataflow.createOutput(name, feat_attrs) dataflow.link(fNode, '', outNode, '') self.dataflow.merge(dataflow) return True
def __init__(self, sample_rate=44100, normalize=None, resample=False): if type(normalize) == int: normalize = '%i' % normalize elif type(normalize) == float: normalize = '%f' % normalize elif normalize and type(normalize) != str: normalize = str(normalize) self.features = {} self.resample = resample self.sample_rate = sample_rate self.audio_params = { 'SampleRate': str(sample_rate), 'Resample': 'yes' if resample else 'no' } if normalize: self.audio_params['RemoveMean'] = 'yes' self.audio_params['ScaleMax'] = normalize self.out_attrs = { 'normalize': normalize or '-1', 'version': yaafecore.getYaafeVersion(), 'samplerate': str(sample_rate), 'resample': 'yes' if resample else 'no' } self.dataflow = DataFlow()
def addFeature(self, definition): """ Add a feature defined according the :ref:`feature definition syntax <featplan>`. :param definition: feature definition :type definition: string :rtype: True on success, False on fail. """ data = definition.split(":") if not len(data) == 2: print 'Syntax error in "%s"' % definition return False name, featdef = data dataflow = DataFlow() inputNode = dataflow.createInput("audio", self.audio_params) if featdef.strip(): for s in featdef.split(">"): s = s.strip() bb = s.split(" ") feat = AudioFeatureFactory.get_feature(bb[0]) if not feat: return False params = {} for d in bb[1:]: if len(d) == 0: continue if not "=" in d: print 'Invalid feature parameter "%s"' % d return False dd = d.split("=") if not len(dd) == 2: print 'Syntax error in feature parameter "%s"' % d return False params[dd[0]] = dd[1] dataflow.append(feat.get_dataflow(params, self.sample_rate)) fNode = dataflow.finalNodes()[0] feat_attrs = self.out_attrs.copy() feat_attrs["yaafedefinition"] = featdef.strip() outNode = dataflow.createOutput(name, feat_attrs) dataflow.link(fNode, "", outNode, "") self.dataflow.merge(dataflow) return True
def __init__(self, sample_rate=44100, normalize=None, resample=False): if type(normalize) == int: normalize = "%i" % normalize elif type(normalize) == float: normalize = "%f" % normalize elif normalize and type(normalize) != str: normalize = str(normalize) self.features = {} self.resample = resample self.sample_rate = sample_rate self.audio_params = {"SampleRate": str(sample_rate), "Resample": "yes" if resample else "no"} if normalize: self.audio_params["RemoveMean"] = "yes" self.audio_params["ScaleMax"] = normalize self.out_attrs = { "normalize": normalize or "-1", "version": yaafecore.getYaafeVersion(), "samplerate": str(sample_rate), "resample": "yes" if resample else "no", } self.dataflow = DataFlow()
import logging from dataflow import DataFlow from workers import head_url, node_url, newfile_filter, ignore_filter, file_to_db logging.basicConfig( level=logging.DEBUG, format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', datefmt='%Y-%m-%d %H:%M:%S', filename='/var/log/homulili/scanner.log', filemode='w', ) logger = logging.getLogger(__name__) master_update_interval = timedelta(minutes=30).total_seconds() madokami_stage_interval = timedelta(seconds=10).total_seconds() logger.info('Starting scanner') df = DataFlow() x = df.rate_limited_node(target=head_url, interval=master_update_interval) x = df.rate_limited_node(input=x.out, target=node_url, interval=madokami_stage_interval) x = df.node(input=x.out, target=newfile_filter) x = df.node(input=x.out, target=ignore_filter) x = df.node(input=x.out, num_outputs=0, target=file_to_db) logger.debug('Scanner graph initialized') df.run()
class FeaturePlan(object): """ FeaturePlan is a collection of features to extract, configured for a specific sample rate. :param sample_rate: analysis samplerate :param normalize: signal maximum normalization, in ]0,1], or `None` to skip normalization. This collection can be load from a file using the :py:meth:`loadFeaturePlan` method, or built by adding features with the :py:meth:`addFeature` method. Then, the :py:meth:`getDataFlow` method retrieve the corresponding :py:class:`DataFlow` object. .. doctest:: >>> fp = FeaturePlan(sample_rate=16000) >>> fp.addFeature('mfcc: MFCC blockSize=512 stepSize=256') True >>> fp.addFeature('mfcc_d1: MFCC blockSize=512 stepSize=256 > Derivate DOrder=1') True >>> fp.addFeature('mfcc_d2: MFCC blockSize=512 stepSize=256 > Derivate DOrder=2') True >>> df = fp.getDataFlow() >>> df.display() ... """ def __init__(self, sample_rate=44100, normalize=None, resample=False): if type(normalize) == int: normalize = '%i' % normalize elif type(normalize) == float: normalize = '%f' % normalize elif normalize and type(normalize) != str: normalize = str(normalize) self.features = {} self.resample = resample self.sample_rate = sample_rate self.audio_params = { 'SampleRate': str(sample_rate), 'Resample': 'yes' if resample else 'no' } if normalize: self.audio_params['RemoveMean'] = 'yes' self.audio_params['ScaleMax'] = normalize self.out_attrs = { 'normalize': normalize or '-1', 'version': yaafecore.getYaafeVersion(), 'samplerate': str(sample_rate), 'resample': 'yes' if resample else 'no' } self.dataflow = DataFlow() def addFeature(self, definition): """ Add a feature defined according the :ref:`feature definition syntax <featplan>`. :param definition: feature definition :type definition: string :rtype: True on success, False on fail. """ data = definition.split(':') if not len(data) == 2: print 'Syntax error in "%s"' % definition return False name, featdef = data dataflow = DataFlow() inputNode = dataflow.createInput('audio', self.audio_params) if featdef.strip(): for s in featdef.split('>'): s = s.strip() bb = s.split(' ') feat = AudioFeatureFactory.get_feature(bb[0]) if not feat: return False params = {} for d in bb[1:]: if len(d) == 0: continue if not '=' in d: print 'Invalid feature parameter "%s"' % d return False dd = d.split('=') if not len(dd) == 2: print 'Syntax error in feature parameter "%s"' % d return False params[dd[0]] = dd[1] dataflow.append(feat.get_dataflow(params, self.sample_rate)) fNode = dataflow.finalNodes()[0] feat_attrs = self.out_attrs.copy() feat_attrs['yaafedefinition'] = featdef.strip() outNode = dataflow.createOutput(name, feat_attrs) dataflow.link(fNode, '', outNode, '') self.dataflow.merge(dataflow) return True def loadFeaturePlan(self, filename): """ Loads feature extraction plan from a file. The file must be a text file, where each line defines a feature (see :ref:`feature definition syntax <feat-def-format>`). :rtype: True on success, False on fail. """ fin = open(filename, 'r') for line in fin: if line.startswith('#'): continue line = line.strip() if line: if not self.addFeature(line): return False fin.close() return True def getDataFlow(self): """ Get the :py:class:`DataFlow` object representing how to extract defined features. :rtype: DataFlow """ return self.dataflow
class FeaturePlan(object): """ FeaturePlan is a collection of features to extract, configured for a specific sample rate. :param sample_rate: analysis samplerate :param normalize: signal maximum normalization, in ]0,1], or `None` to skip normalization. This collection can be load from a file using the :py:meth:`loadFeaturePlan` method, or built by adding features with the :py:meth:`addFeature` method. Then, the :py:meth:`getDataFlow` method retrieve the corresponding :py:class:`DataFlow` object. .. doctest:: >>> fp = FeaturePlan(sample_rate=16000) >>> fp.addFeature('mfcc: MFCC blockSize=512 stepSize=256') True >>> fp.addFeature('mfcc_d1: MFCC blockSize=512 stepSize=256 > Derivate DOrder=1') True >>> fp.addFeature('mfcc_d2: MFCC blockSize=512 stepSize=256 > Derivate DOrder=2') True >>> df = fp.getDataFlow() >>> df.display() ... """ def __init__(self, sample_rate=44100, normalize=None, resample=False): if type(normalize) == int: normalize = "%i" % normalize elif type(normalize) == float: normalize = "%f" % normalize elif normalize and type(normalize) != str: normalize = str(normalize) self.features = {} self.resample = resample self.sample_rate = sample_rate self.audio_params = {"SampleRate": str(sample_rate), "Resample": "yes" if resample else "no"} if normalize: self.audio_params["RemoveMean"] = "yes" self.audio_params["ScaleMax"] = normalize self.out_attrs = { "normalize": normalize or "-1", "version": yaafecore.getYaafeVersion(), "samplerate": str(sample_rate), "resample": "yes" if resample else "no", } self.dataflow = DataFlow() def addFeature(self, definition): """ Add a feature defined according the :ref:`feature definition syntax <featplan>`. :param definition: feature definition :type definition: string :rtype: True on success, False on fail. """ data = definition.split(":") if not len(data) == 2: print 'Syntax error in "%s"' % definition return False name, featdef = data dataflow = DataFlow() inputNode = dataflow.createInput("audio", self.audio_params) if featdef.strip(): for s in featdef.split(">"): s = s.strip() bb = s.split(" ") feat = AudioFeatureFactory.get_feature(bb[0]) if not feat: return False params = {} for d in bb[1:]: if len(d) == 0: continue if not "=" in d: print 'Invalid feature parameter "%s"' % d return False dd = d.split("=") if not len(dd) == 2: print 'Syntax error in feature parameter "%s"' % d return False params[dd[0]] = dd[1] dataflow.append(feat.get_dataflow(params, self.sample_rate)) fNode = dataflow.finalNodes()[0] feat_attrs = self.out_attrs.copy() feat_attrs["yaafedefinition"] = featdef.strip() outNode = dataflow.createOutput(name, feat_attrs) dataflow.link(fNode, "", outNode, "") self.dataflow.merge(dataflow) return True def loadFeaturePlan(self, filename): """ Loads feature extraction plan from a file. The file must be a text file, where each line defines a feature (see :ref:`feature definition syntax <feat-def-format>`). :rtype: True on success, False on fail. """ fin = open(filename, "r") for line in fin: if line.startswith("#"): continue line = line.strip() if line: if not self.addFeature(line): return False fin.close() return True def getDataFlow(self): """ Get the :py:class:`DataFlow` object representing how to extract defined features. :rtype: DataFlow """ return self.dataflow
import logging from dataflow import DataFlow from workers import get_manga_ids, urls_from_db, name_file, download_file, update_db logging.basicConfig( level=logging.DEBUG, format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', datefmt='%Y-%m-%d %H:%M:%S', filename='/var/log/homulili/scraper.log', filemode='w', ) logger = logging.getLogger(__name__) master_interval = timedelta(minutes=5).total_seconds() madokami_file_interval = timedelta(seconds=30).total_seconds() logger.info('Starting scraper') df = DataFlow() x = df.rate_limited_node(interval=master_interval, target=get_manga_ids) x = df.node(input=x.out, target=urls_from_db) x = df.node(input=x.out, target=name_file) x = df.rate_limited_node(interval=madokami_file_interval, input=x.out, target=download_file) x = df.node(input=x.out, target=update_db, num_outputs=0) logger.debug('Scraper graph initialized') df.run()