Example #1
0
    def __init__(self, string, type_of_sentence=None):
        self.type_of_sentence = type_of_sentence
        self.sentence = string
        self.nominal1 = None
        self.nominal2 = None

        self.get_nominal_pair()
        self.remove_tags()
        self.feature_set = FeatureSet(self)
Example #2
0
	def __init__(self, samplerate, featureList, mp3dirs, k, times, run_before, euclidean):
		self.cluster = []
		self.SAMPLERATE = samplerate
		self.featureList = featureList
		self.num_features = len(featureList)
		self.mp3dirs = mp3dirs
		self.k = k
		self.times = times
		self.euclidean = euclidean

		#do mp3_to_feature_vectors and brand new FeatureSet object if this is a new dataset/feature combo
		if run_before == False:
			self.mp3_to_feature_vectors()
			self.f = FeatureSet(self.featureList, False)
		#else skip conversion, load from file
		else:
			self.f = FeatureSet(self.featureList, True)
Example #3
0
class Sentence:

    RelatorList = ["because", "after", "since", " as "]

    def __init__(self, string, type_of_sentence=None):
        self.type_of_sentence = type_of_sentence
        self.sentence = string
        self.nominal1 = None
        self.nominal2 = None

        self.get_nominal_pair()
        self.remove_tags()
        self.feature_set = FeatureSet(self)
        # have to remove tags before tokenizing

    def get_nominal_pair(self):
        # finds the tags in the s and returns four cases
        match = re.findall(r"(<.*?>(.*?)</.*?>)", self.sentence)
        if match:
            self.nominal1 = match[0][1]
            self.nominal2 = match[1][1]
        # returns the two cases we want which don't have the tags anymore

    def remove_tags(self):
        self.sentence = self.sentence.replace("<e>", "")
        self.sentence = self.sentence.replace("</e>", "")

    def is_causal_from_dataset(self):
        return "Cause-Effect" in self.type_of_sentence

    def causal_features(self):
        features = {"HasRelator": self.feature_set.has_relator(),
                    "DiscreditRelator": self.feature_set.discredit_relator(),
                    "HasCausalVerb": self.feature_set.check_for_causal_verb(),
                    "HasCause": self.feature_set.check_for_cause_in_sentence()}
        return features
Example #4
0
from accurancyTools import *
from sklearn import ensemble
import itertools
import matplotlib.pyplot as plt

trainingDataSetNames = ['Volume', 'CentroidNorm', 'Centroid', 'Perimeter', 'PseudoRadius', 'Complexity',
    'BoundingBox2Volume', 'BoundingBoxAspectRatio', 'IntensityMax', 'IntensityMean',
    'IntensityMin', 'IntensityStd', 'CloseMassRatio', 'IntensityHist', 'gaussianCoefficients',
    'gaussianGOV', 'Gradient', 'GradientOfMag']

senaryo1File = '../../noduledetectordata/ilastikoutput3/s1/s1.h5'
senaryo1LabelFile = '../../noduledetectordata/ilastikoutput3/s1/s1_labels.h5'
senaryo1BatchFile = '../../noduledetectordata/ilastikoutput3/s1/example_05.h5'
senaryo1BatchLabelFile = '../../noduledetectordata/ilastikoutput3/s1/labels_example_05.h5'
senaryo1 = {'all_features': senaryo1File, 'all_features_labels': senaryo1LabelFile, 'batch_features': senaryo1BatchFile, 'batch_features_labels': senaryo1BatchLabelFile}
senaryo1_train_sets = FeatureSet.readFromFile(senaryo1File, trainingDataSetNames, senaryo1LabelFile, 'labels')
senaryo1_batch_sets = FeatureSet.readFromFile(senaryo1BatchFile, trainingDataSetNames, senaryo1BatchLabelFile, 'labels')
senaryo1_sets = {'all_features_set': senaryo1_train_sets, 'batch_features_set': senaryo1_batch_sets, 'fileName': 'example05'}

senaryo2File = '../../noduledetectordata/ilastikoutput3/s2/s2.h5'
senaryo2LabelFile = '../../noduledetectordata/ilastikoutput3/s2/s2_labels.h5'
senaryo2BatchFile = '../../noduledetectordata/ilastikoutput3/s2/example_01.h5'
senaryo2BatchLabelFile = '../../noduledetectordata/ilastikoutput3/s2/labels_example_01.h5'
senaryo2 = {'all_features': senaryo2File, 'all_features_labels': senaryo2LabelFile, 'batch_features': senaryo2BatchFile, 'batch_features_labels': senaryo2BatchLabelFile}
senaryo2_train_sets = FeatureSet.readFromFile(senaryo2File, trainingDataSetNames, senaryo2LabelFile, 'labels')
senaryo2_batch_sets = FeatureSet.readFromFile(senaryo2BatchFile, trainingDataSetNames, senaryo2BatchLabelFile, 'labels')
senaryo2_sets = {'all_features_set': senaryo2_train_sets, 'batch_features_set': senaryo2_batch_sets, 'fileName': 'example01'}

senaryo3File = '../../noduledetectordata/ilastikoutput3/s3/s3.h5'
senaryo3LabelFile = '../../noduledetectordata/ilastikoutput3/s3/s3_labels.h5'
senaryo3BatchFile = '../../noduledetectordata/ilastikoutput3/s3/example_03.h5'
#'skewness']
#this is vigra computeed
#trainingDataSetNames = ['count', 'regionCenter', 'regionRadii', 'histogram']

# this is matlab 
trainingDataSetNames = ['Volume', 'CentroidNorm', 'Perimeter','Complexity',
 'BoundingBox2Volume','BoundingBoxAspectRatio', #, 'BoundingBoxAspectRatio',
   'IntensityHist']#'BoundingBoxAspectRatio']#, 'IntensityMax','IntensityMean',

random_seed= 100
numpy.random.seed(random_seed)
print numpy.random.rand(1,1)
repeatN = 100
acc = numpy.zeros([repeatN, 3])

allFeatures = FeatureSet.readFromFile(featureFile, trainingDataSetNames,labelFile, 'labels')
coordinateFeatures = FeatureSet.readFromFile(featureFile,['Centroid'] ,labelFile, 'labels')
predThreshold = 0.5
thresholdRange = numpy.linspace(0.1, 0.999, 10)
roc_mn_acc=numpy.zeros([len(thresholdRange),3])
roc_std_acc=numpy.zeros([len(thresholdRange),3])

partreeCount = 50
counter = 0
for predThreshold in thresholdRange:
    for ite in range(0,repeatN):
        
        trn, val = allFeatures.divideSetRandom(1,1,True)
        #trn, val = allFeatures.divideSetByZ(coordinateFeatures.data[:,2])
        rf = vigra.learning.RandomForest(treeCount=partreeCount)
        rf.learnRF(trn.data, trn.labels,randomSeed = ite*10)
Example #6
0
    'CloseMassRatio', 'IntensityHist', 'gaussianCoefficients', 'gaussianGOV',
    'Gradient', 'GradientOfMag'
]

senaryo1File = '../../noduledetectordata/ilastikoutput3/s1/s1.h5'
senaryo1LabelFile = '../../noduledetectordata/ilastikoutput3/s1/s1_labels.h5'
senaryo1BatchFile = '../../noduledetectordata/ilastikoutput3/s1/example_05.h5'
senaryo1BatchLabelFile = '../../noduledetectordata/ilastikoutput3/s1/labels_example_05.h5'
senaryo1 = {
    'all_features': senaryo1File,
    'all_features_labels': senaryo1LabelFile,
    'batch_features': senaryo1BatchFile,
    'batch_features_labels': senaryo1BatchLabelFile
}
senaryo1_train_sets = FeatureSet.readFromFile(senaryo1File,
                                              trainingDataSetNames,
                                              senaryo1LabelFile, 'labels')
senaryo1_batch_sets = FeatureSet.readFromFile(senaryo1BatchFile,
                                              trainingDataSetNames,
                                              senaryo1BatchLabelFile, 'labels')
senaryo1_sets = {
    'all_features_set': senaryo1_train_sets,
    'batch_features_set': senaryo1_batch_sets,
    'fileName': 'example05'
}

senaryo2File = '../../noduledetectordata/ilastikoutput3/s2/s2.h5'
senaryo2LabelFile = '../../noduledetectordata/ilastikoutput3/s2/s2_labels.h5'
senaryo2BatchFile = '../../noduledetectordata/ilastikoutput3/s2/example_01.h5'
senaryo2BatchLabelFile = '../../noduledetectordata/ilastikoutput3/s2/labels_example_01.h5'
senaryo2 = {
Example #7
0
if __name__ == '__main__':

    # test code
    from Bio import GenBank
    from Bio.SeqFeature import SeqFeature
    from FeatureSet import FeatureSet
    from GraphSet import GraphSet
    from random import normalvariate

    parser = GenBank.FeatureParser()
    fhandle = open(
        '/data/genomes/Bacteria/Nanoarchaeum_equitans/NC_005213.gbk', 'r')
    genbank_entry = parser.parse(fhandle)
    fhandle.close()

    gdfs1 = FeatureSet(0, 'Nanoarchaeum equitans CDS - CDS')
    gdfs2 = FeatureSet(1, 'Nanoarchaeum equitans CDS - gene')
    for feature in genbank_entry.features:
        if feature.type == 'CDS':
            gdfs1.add_feature(feature)
        if feature.type == 'gene':
            gdfs2.add_feature(feature)

    gdt = Track()
    gdt.add_set(gdfs1)
    gdt.add_set(gdfs2)

    graphdata = []
    for pos in xrange(1, len(genbank_entry.seq), 1000):
        graphdata.append((pos, normalvariate(0.5, 0.1)))
    gdgs = GraphSet(2, 'test data')
Example #8
0
if __name__ == "__main__":

    # test code
    from Bio import GenBank
    from Bio.SeqFeature import SeqFeature
    from FeatureSet import FeatureSet
    from GraphSet import GraphSet
    from random import normalvariate

    parser = GenBank.FeatureParser()
    fhandle = open("/data/genomes/Bacteria/Nanoarchaeum_equitans/NC_005213.gbk", "r")
    genbank_entry = parser.parse(fhandle)
    fhandle.close()

    gdfs1 = FeatureSet(0, "Nanoarchaeum equitans CDS - CDS")
    gdfs2 = FeatureSet(1, "Nanoarchaeum equitans CDS - gene")
    for feature in genbank_entry.features:
        if feature.type == "CDS":
            gdfs1.add_feature(feature)
        if feature.type == "gene":
            gdfs2.add_feature(feature)

    gdt = Track()
    gdt.add_set(gdfs1)
    gdt.add_set(gdfs2)

    graphdata = []
    for pos in xrange(1, len(genbank_entry.seq), 1000):
        graphdata.append((pos, normalvariate(0.5, 0.1)))
    gdgs = GraphSet(2, "test data")