Ejemplo n.º 1
0
def func_to_optimize(parameters):
    '''function to be minimized'''
    #get the data
    tvec, somavs = get_dataset(**parameters)

    allFeatures = GetFeatures(tvec, somavs)

    #container for output
    stuff = []

    #compute errors in each featurespace
    diff_feat0 = (features.feature0(**feature_params_0) -
                  allFeatures.feature0(**feature_params_0))
    diff_feat0 *= diff_feat0
    error_feat0 = np.sqrt(diff_feat0).sum()

    diff_feat1 = (features.feature1(**feature_params_1) -
                  allFeatures.feature1(**feature_params_1))
    diff_feat1 *= diff_feat1
    error_feat1 = np.sqrt(diff_feat1).sum()

    diff_feat5 = (features.feature5(**feature_params_5) -
                  allFeatures.feature5(**feature_params_5))
    diff_feat5 *= diff_feat5
    error_feat5 = np.sqrt(diff_feat5).sum()

    #prepare output, which is just a list of numbers for each feature.
    stuff.append(error_feat0)
    stuff.append(error_feat1)
    stuff.append(error_feat5)

    return stuff
Ejemplo n.º 2
0
def DoPCA(feature_names, nproc = 1):
    model_file = 'pca_model_[{0}].pkl'.format(feature_names)

    if os.path.exists(model_file):
        log.info('Loading PCA model from {0}'.format(model_file))
        model = PCA()
        model.Load(model_file)
    else:
        log.info('Training PCA model')

        X, info = GetFeatures(feature_names)
        model = PCA.Train(X)
        model.Save(model_file)
        model.Save(model_file.replace('.pkl','.mat'))
    
    return model
Ejemplo n.º 3
0
def GetEdges(coord_name, edge_thresh, nproc=1):
    check(coord_name in ['rdz', 'xyz'], 'unknown coord type')

    etag = '[{0}][{1}]'.format(coord_name, edge_thresh)
    edge_file = 'edges_{0}.pkl'.format(etag)

    if os.path.exists(edge_file):
        log.info('Loading edges from {0}'.format(edge_file))

        data = LoadPickles(edge_file)
        edges = data['edges']
        spec_ids = data['spec_ids']
        rdz = data['rdz']
        xyz = data['xyz']
    else:
        log.info('Extracting edges from raw data')

        # get the locations
        feature, info = GetFeatures('Spectrum', nproc=nproc)
        del feature
        spec_ids = info['specObjID']
        rdz = info['rdz']
        xyz = utils.ConvertToCartesian(rdz[:, 0], rdz[:, 1], rdz[:, 2])

        if coord_name == 'rdz':
            box = arr([[-10. / 60, -10. / 60, -edge_thresh],
                       [10. / 60, 10. / 60, edge_thresh]])
            rdd = rdz.copy()
            rdd[:, 2] = utils.ComovingDistance(rdz[:, 2])

            edges = NNSearch(rdd, [box + d for d in rdd], 'box', nproc, True)
        else:
            edges = NNSearch(xyz, (xyz, edge_thresh), 'sphere', nproc, True)

        SavePickle(edge_file, {
            'edges': edges,
            'spec_ids': spec_ids,
            'rdz': rdz,
            'xyz': xyz
        })

    log.info('{0} edges found'.format(edges.shape[1]))
    return (edges, spec_ids, rdz, xyz)
Ejemplo n.º 4
0
    def GetLabeledObjects(self,
                          feature_names,
                          data_file='data_labeled_simbad_{0}.pkl',
                          min_class_size=5):
        '''return only the objects that are labeled by simbad
        '''

        data_file = data_file.format(feature_names)
        if os.path.exists(data_file):
            log.info('Loading data from {0}'.format(data_file))
            data = LoadPickles(data_file)
        else:
            # get features
            feature, info = GetFeatures(feature_names)

            # get the simbad labels
            sb_specObjID, sb_dist, sb_objType, sb_objType_name, class_sizes = self.GetSIMBADLabels(
                min_class_size)

            # labeling
            labels = self.LabelData(info['specObjID'], sb_specObjID,
                                    sb_objType)
            lidx = find(labels >= 0)[0]
            info['labeled'] = lidx

            data = {
                'features': feature[lidx],
                'labels': labels[lidx],
                'class_names': sb_objType_name,
                'class_sizes': class_sizes,
                'info': info
            }
            SavePickle(data_file, data)

        return (data['features'], data['labels'], data['class_names'],
                data['class_sizes'], data['info'])
Ejemplo n.º 5
0
    opts = getopt(sys.argv[1:], ['nproc=', 'feature=', 'scorer='])

    nproc = int(opts.get('--nproc', 1))
    feature_names = opts.get('--feature', 'Spectrum')
    scorer = opts.get('--scorer', 'pca:accum_err:0.98').lower()

    output_dir = './detection_point/'
    MakeDir(output_dir)

    tag = "[{0}][{1}]".format(feature_names, scorer)
    log.info('Run name: {0}'.format(tag))

    scorer, method, param = scorer.split(':')[:3]

    # get the feature
    feature, info = GetFeatures(feature_names, nproc=nproc)

    # scoring
    if scorer == 'pca':
        E = float(param)
        scores = PCAAnomalyScore(feature, feature, E, method)
    elif scorer == 'knn':
        K = int(param)
        scores = KNNAnomalyScore(feature, feature, K, method, nproc)
    elif scorer == 'mmf':
        rk = int(param)
        scores = MMFAnomalyScore(feature, feature, rk, method)[0]
    else:
        raise ValueError('unknown scorer')

    info['scores'] = scores
Ejemplo n.º 6
0
    if os.path.isdir(filedest):
        for f in glob.glob(os.path.join(filedest, '*')):
            os.system('rm %s' % f)
    else:
        os.mkdir(filedest)

COMM.Barrier()

#stimulus current amplitudes
stim_amp = [0.2, -0.05]

#Get the features from "data" we'll fit against
tvec, somavs = get_dataset()

#object returning features of soma traces
features = GetFeatures(tvec, somavs)

#define some parameters for feature extraction
feature_params_0 = dict(xedges=np.arange(-100, 55, 5),
                        yedges=np.arange(-10, 25, 1),
                        threshold=1,
                        smooth=True)

feature_params_1 = dict(rows=[-1],
                        inds=np.r_[range(800, 1600),
                                   range(7200, 8000)])

feature_params_5 = dict()

#Set range variables of all degrees of freedom, must be used by
#func_to_optimize() or functions used by it.