def __init__(self, min_df=2, max_per=1.0, binarize=False, transform=None, replace_num='#',
              source=None, subdir=None, pseudotype=None, splits_file=None, stage='training'):
     name = 'property'
     prefix = '_p_'
     FeatureExtractorCounts.__init__(self, name=name, prefix=prefix, min_df=min_df, max_per=max_per,
                                     binarize=binarize, transform=transform, source=source, subdir=subdir,
                                     pseudotype=pseudotype, splits_file=splits_file, replace_num=replace_num,
                                     stage=stage)
 def __init__(self, test_fold=0, dev_subfold=None):
     #print "Creating from arguments"
     name = 'dataset'
     prefix = '_d_'
     FeatureExtractorCounts.__init__(self, name, prefix, add_oov=True,
                                     min_doc_threshold=1,
                                     binarize=True,
                                     test_fold=test_fold,
                                     dev_subfold=dev_subfold)
 def __init__(self, n=1, min_df=2, max_per=1.0, binarize=False, transform=None, replace_num='#',
              source=None, subdir=None, pseudotype=None, splits_file=None, stage='training',
              lower=True, scale_factor=None):
     name = 'ngrams'
     self.n = int(n)
     prefix = '_n' + str(n) + '_'
     suffix = ',n=' + str(self.n)
     FeatureExtractorCounts.__init__(self, name=name, prefix=prefix, min_df=min_df, max_per=max_per,
                                     binarize=binarize, transform=transform, source=source, subdir=subdir,
                                     pseudotype=pseudotype, splits_file=splits_file, replace_num=replace_num,
                                     stage=stage, suffix=suffix, lower=lower, scale_factor=scale_factor)
 def __init__(self, test_fold=0, dev_subfold=None, binarize=False, clusters=''):
     #print "Creating from arguments"
     name = 'brownclusters'
     prefix = '_bc-' + clusters + '_'
     FeatureExtractorCounts.__init__(self, name, prefix, add_oov=True,
                                     min_doc_threshold=1,
                                     binarize=binarize,
                                     test_fold=test_fold,
                                     dev_subfold=dev_subfold)
     self.params['clusters'] = clusters
     FeatureExtractorCountsBrownClusters.extend_dirname(self)
 def __init__(self, test_fold=0, dev_subfold=None, n=1, min_doc_threshold=1, binarize=True,
              concat_oov_counts=False, append_dataset=False, source='normalized'):
     #print "Creating from arguments"
     name = 'ngrams'
     prefix = '_n' + str(n) + '_'
     FeatureExtractorCounts.__init__(self, name, prefix, add_oov=True,
                                     min_doc_threshold=min_doc_threshold,
                                     binarize=binarize,
                                     test_fold=test_fold,
                                     dev_subfold=dev_subfold)
     self.params['n'] = int(n)
     self.params['concat_oov_counts'] = ast.literal_eval(str(concat_oov_counts))
     self.params['append_dataset'] = ast.literal_eval(str(append_dataset))
     self.params['source'] = source
     FeatureExtractorCountsNgrams.extend_dirname(self)
    def __init__(self, min_df=2, max_per=1.0, binarize=True, transform=None, replace_num='#',
                 subdir=None, source=None, splits_file=None, pseudotype=None, stage='training',
                 lower=True, scale_factor=None, shorten=None):
        name = 'list'
        prefix = '_l_'

        if shorten is not None:
            self.shorten = int(shorten)
        if subdir == 'brown':
            replace_num = None
        suffix = ''
        if shorten is not None:
            suffix += ',shorten=' + str(self.shorten)
        FeatureExtractorCounts.__init__(self, name=name, prefix=prefix, min_df=min_df, max_per=max_per,
                                        binarize=binarize, transform=transform, replace_num=replace_num,
                                        source=source, subdir=subdir, pseudotype=pseudotype, splits_file=splits_file,
                                        stage=stage, lower=lower, suffix=suffix, scale_factor=scale_factor)
 def from_files(cls, dirname):
     #print "Loading from files", dirname
     name, feature_type, test_fold, dev_subfold,\
         add_oov, min_doc_threshold, binarize, extra = FeatureExtractorCounts.parse_dirname(dirname)
     assert len(extra) == 1
     n = int(extra[0])
     assert name == 'ngrams'
     return cls(test_fold=test_fold, dev_subfold=dev_subfold, n=n, min_doc_threshold=min_doc_threshold,
                binarize=binarize)