def ClassifyERPs (
		featurefiles,
		C = (10.0, 1.0, 0.1, 0.01),
		gamma = (1.0, 0.8, 0.6, 0.4, 0.2, 0.0),
		keepchan = (),
		rmchan = (),
		rmchan_usualsuspects = ('AUDL','AUDR','LAUD','RAUD','SYNC','VSYNC', 'VMRK', 'OLDREF'),
		rebias = True,
		save = False,
		select = False,
		description='ERPs to attended vs unattended events',
		maxcount=None,
		classes=None,
		folds=None,
		time_window=None,
		keeptrials=None,
	):

	file_inventory = []
	d = DataFiles.load(featurefiles, catdim=0, maxcount=maxcount, return_details=file_inventory)
 	if isinstance(folds, basestring) and folds.lower() in ['lofo', 'loro', 'leave on run out', 'leave one file out']:
 		n, folds = 0, []
 		for each in file_inventory:
 			neach = each[1]['x']
 			folds.append(range(n, n+neach))
 			n += neach
 	
	if 'x' not in d: raise ValueError("found no trial data - no 'x' variable - in the specified files")
	if 'y' not in d: raise ValueError("found no trial labels - no 'y' variable - in the specified files")

	x = d['x']
	y = numpy.array(d['y'].flat)
	if keeptrials != None:
		x = x[numpy.asarray(keeptrials), :, :]
		y = y[numpy.asarray(keeptrials)]
		
	if time_window != None:
		fs = d['fs']
		t = SigTools.samples2msec(numpy.arange(x.shape[2]), fs)
		x[:, :, t<min(time_window)] = 0
		x[:, :, t>max(time_window)] = 0
		
		
	if classes != None:
		for cl in classes:
			if cl not in y: raise ValueError("class %s is not in the dataset" % str(cl))
		mask = numpy.array([yi in classes for yi in y])
		y = y[mask]
		x = x[mask]
		discarded = sum(mask==False)
		if discarded: print "discarding %d trials that are outside the requested classes %s"%(discarded,str(classes))
		
	n = len(y)
	uy = numpy.unique(y)
	if uy.size != 2: raise ValueError("expected 2 classes in dataset, found %d : %s" % (uy.size, str(uy)))
	for uyi in uy:
		nyi = sum([yi==uyi for yi in y])
		nyi_min = 2
		if nyi < nyi_min: raise ValueError('only %d exemplars of class %s - need at least %d' % (nyi,str(uyi),nyi_min))
			
	y = numpy.sign(y - uy.mean())

	cov,trchvar = SigTools.spcov(x=x, y=y, balance=False, return_trchvar=True) # NB: symwhitenkern would not be able to balance
	
	starttime = time.time()
	
	chlower = [ch.lower() for ch in d['channels']]
	if keepchan in [None,(),'',[]]:
		if isinstance(rmchan, basestring): rmchan = rmchan.split()
		if isinstance(rmchan_usualsuspects, basestring): rmchan_usualsuspects = rmchan_usualsuspects.split()
		allrmchan = [ch.lower() for ch in list(rmchan)+list(rmchan_usualsuspects)]
		unwanted = numpy.array([ch in allrmchan for ch in chlower])
		notfound = [ch for ch in rmchan if ch.lower() not in chlower]
	else:
		if isinstance(keepchan, basestring): keepchan = keepchan.split()
		lowerkeepchan = [ch.lower() for ch in keepchan]
		unwanted = numpy.array([ch not in lowerkeepchan for ch in chlower])
		notfound = [ch for ch in keepchan if ch.lower() not in chlower]
		
	wanted = numpy.logical_not(unwanted)
	print ' '
	if len(notfound): print "WARNING: could not find channel%s %s\n" % ({1:''}.get(len(notfound),'s'), ', '.join(notfound))
	removed = [ch for removing,ch in zip(unwanted, d['channels']) if removing]
	if len(removed): print "removed %d channel%s (%s)" % (len(removed), {1:''}.get(len(removed),'s'), ', '.join(removed))
	print "classification will be based on %d channel%s" % (sum(wanted), {1:''}.get(sum(wanted),'s'))
	print "%d negatives + %d positives = %d exemplars" % (sum(y<0), sum(y>0), n)
	print ' '
	
	x[:, unwanted, :] = 0
	cov[:, unwanted] = 0
	cov[unwanted, :] = 0
	nu = numpy.asarray(cov).diagonal()[wanted].mean()
	for i in range(len(cov)):
		if cov[i,i] == 0: cov[i,i] = nu
	
	if not isinstance(C, (tuple,list,numpy.ndarray,type(None))): C = [C]
	if not isinstance(gamma, (tuple,list,numpy.ndarray,type(None))): gamma = [gamma]

	c = SigTools.klr2class(lossfunc=SigTools.balanced_loss, relcost='balance')
	c.varyhyper({})
	if c != None: c.hyper.C=list(C)
	if gamma == None: c.hyper.kernel.func = SigTools.linkern
	else: c.varyhyper({'kernel.func':SigTools.symwhitenkern, 'kernel.cov':[cov], 'kernel.gamma':list(gamma)})
	c.cvtrain(x=x, y=y, folds=folds)
	if rebias: c.rebias()
	c.calibrate()

	chosen = c.cv.chosen.hyper
	if gamma == None:
		Ps = None
		Gp = c.featureweight(x=x)
	else:
		Ps = SigTools.svd(SigTools.shrinkcov(cov, copy=True, gamma=chosen.kernel.gamma)).isqrtm
		xp = SigTools.spfilt(x, Ps.H, copy=True)
		Gp = c.featureweight(x=xp)
	
	u = SigTools.stfac(Gp, Ps)
	u.channels = d['channels']		
	u.channels_used = wanted
	u.fs = d['fs']
	u.trchvar = trchvar
	try: u.channels = SigTools.ChannelSet(u.channels)
	except: print 'WARNING: failed to convert channels to ChannelSet'

	elapsed = time.time() - starttime
	minutes = int(elapsed/60.0)
	seconds = int(round(elapsed - minutes * 60.0))
	print '%d min %d sec' % (minutes, seconds)
	datestamp = time.strftime('%Y-%m-%d %H:%M:%S')
	csummary = '%s (%s) trained on %d (CV %s = %.3f) at %s' % (
		c.__class__.__name__,
		SigTools.experiment()._shortdesc(chosen),
		sum(c.input.istrain),
		c.loss.func.__name__,
		c.loss.train,
		datestamp,
	)
	description = 'binary classification of %s: %s' % (description, csummary)
	u.description = description
	
	if save or select:
		if not isinstance(save, basestring):
			save = featurefiles
			if isinstance(save, (tuple,list)): save = save[-1]
			if save.lower().endswith('.gz'): save = save[:-3]
			if save.lower().endswith('.pk'): save = save[:-3]
			save = save + '_weights.prm'
		print "\nsaving %s\n" % save
		Parameters.Param(u.G.A, Name='ERPClassifierWeights', Section='PythonSig', Subsection='Epoch', Comment=csummary).write_to(save)
		Parameters.Param(c.model.bias, Name='ERPClassifierBias', Section='PythonSig', Subsection='Epoch', Comment=csummary).append_to(save)
		Parameters.Param(description, Name='SignalProcessingDescription', Section='PythonSig').append_to(save)
		if select:
			if not isinstance(select, basestring): select = 'ChosenWeights.prm'
			if not os.path.isabs(select): select = os.path.join(os.path.split(save)[0], select)
			print "saving %s\n" % select
			import shutil; shutil.copyfile(save, select)
	
	print description
	return u,c
Exemple #2
0
def ClassifyERPs (
		featurefile,
		C = (10.0, 1.0, 0.1, 0.01),
		gamma = (1.0, 0.8, 0.6, 0.4, 0.2, 0.0),
		rmchan = (),
		rebias = True,
		save = False,
		description='ERPs to attended vs unattended events',
		maxcount=None,
	):

	d = DataFiles.load(featurefile, catdim=0, maxcount=maxcount)

	x = d['x']
	y = numpy.array(d['y'].flat)
	n = len(y)
	uy = numpy.unique(y)
	if uy.size != 2: raise ValueError("expected 2 classes in dataset, found %d" % uy.size)
	y = numpy.sign(y - uy.mean())

	cov,trchvar = SigTools.spcov(x=x, y=y, balance=False, return_trchvar=True) # NB: symwhitenkern would not be able to balance
	
	starttime = time.time()
	
	if isinstance(rmchan, basestring): rmchan = rmchan.split()
	allrmchan = tuple([ch.lower() for ch in rmchan]) + ('audl','audr','laud','raud','sync','vsync', 'vmrk', 'oldref')
	chlower = [ch.lower() for ch in d['channels']]
	unwanted = numpy.array([ch in allrmchan for ch in chlower])
	wanted = numpy.logical_not(unwanted)
	notfound = [ch for ch in rmchan if ch.lower() not in chlower]
	print ' '
	if len(notfound): print "WARNING: could not find channel%s %s\n" % ({1:''}.get(len(notfound),'s'), ', '.join(notfound))
	removed = [ch for removing,ch in zip(unwanted, d['channels']) if removing]
	if len(removed): print "removed %d channel%s (%s)" % (len(removed), {1:''}.get(len(removed),'s'), ', '.join(removed))
	print "classification will be based on %d channel%s" % (sum(wanted), {1:''}.get(sum(wanted),'s'))
	print "%d negatives + %d positives = %d exemplars" % (sum(y<0), sum(y>0), n)
	print ' '
	
	x[:, unwanted, :] = 0
	cov[:, unwanted] = 0
	cov[unwanted, :] = 0
	nu = numpy.asarray(cov).diagonal()[wanted].mean()
	for i in range(len(cov)):
		if cov[i,i] == 0: cov[i,i] = nu
	
	if not isinstance(C, (tuple,list,numpy.ndarray,type(None))): C = [C]
	if not isinstance(gamma, (tuple,list,numpy.ndarray,type(None))): gamma = [gamma]

	c = SigTools.klr2class(lossfunc=SigTools.balanced_loss, relcost='balance').varyhyper({})
	if c != None: c.hyper.C=list(C)
	if gamma == None: c.hyper.kernel.func = SigTools.linkern
	else: c.varyhyper({'kernel.func':SigTools.symwhitenkern, 'kernel.cov':[cov], 'kernel.gamma':list(gamma)})
	c.cvtrain(x=x,y=y)
	if rebias: c.rebias()
	c.calibrate()

	chosen = c.cv.chosen.hyper
	if gamma == None:
		Ps = None
		Gp = c.featureweight(x=x)
	else:
		Ps = SigTools.svd(SigTools.shrinkcov(cov, copy=True, gamma=chosen.kernel.gamma)).isqrtm
		xp = SigTools.spfilt(x, Ps.H, copy=True)
		Gp = c.featureweight(x=xp)
	
	u = SigTools.stfac(Gp, Ps)
	u.channels = d['channels']
	u.channels_used = wanted
	u.fs = d['fs']
	u.trchvar = trchvar
	
	elapsed = time.time() - starttime
	minutes = int(elapsed/60.0)
	seconds = int(round(elapsed - minutes * 60.0))
	print '%d min %d sec' % (minutes, seconds)
	datestamp = time.strftime('%Y-%m-%d %H:%M:%S')
	csummary = '%s (%s) trained on %d (CV %s = %.3f) at %s' % (
		c.__class__.__name__,
		SigTools.experiment()._shortdesc(chosen),
		sum(c.input.istrain),
		c.loss.func.__name__,
		c.loss.train,
		datestamp,
	)
	description = 'binary classification of %s: %s' % (description, csummary)
	u.description = description
	
	if save:
		if not isinstance(save, basestring):
			save = featurefile
			if isinstance(save, (tuple,list)): save = save[-1]
			if save.lower().endswith('.gz'): save = save[:-3]
			if save.lower().endswith('.pk'): save = save[:-3]
			save = save + '_weights.prm'
		print "\nsaving %s\n" % save
		Parameters.Param(u.G.A, name='ERPClassifierWeights', tab='PythonSig', section='Epoch', comment=csummary).writeto(save)
		Parameters.Param(c.model.bias, name='ERPClassifierBias', tab='PythonSig', section='Epoch', comment=csummary).appendto(save)
		Parameters.Param(description, name='SignalProcessingDescription', tab='PythonSig').appendto(save)
	return u,c
Exemple #3
0
def ClassifyERPs(
    featurefiles,
    C=(10.0, 1.0, 0.1, 0.01),
    gamma=(1.0, 0.8, 0.6, 0.4, 0.2, 0.0),
    keepchan=(),
    rmchan=(),
    rmchan_usualsuspects=('AUDL', 'AUDR', 'LAUD', 'RAUD', 'SYNC', 'VSYNC',
                          'VMRK', 'OLDREF'),
    rebias=True,
    save=False,
    select=False,
    description='ERPs to attended vs unattended events',
    maxcount=None,
    classes=None,
    folds=None,
    time_window=None,
    keeptrials=None,
):

    file_inventory = []
    d = DataFiles.load(featurefiles,
                       catdim=0,
                       maxcount=maxcount,
                       return_details=file_inventory)
    if isinstance(folds, basestring) and folds.lower() in [
            'lofo', 'loro', 'leave on run out', 'leave one file out'
    ]:
        n, folds = 0, []
        for each in file_inventory:
            neach = each[1]['x']
            folds.append(range(n, n + neach))
            n += neach

    if 'x' not in d:
        raise ValueError(
            "found no trial data - no 'x' variable - in the specified files")
    if 'y' not in d:
        raise ValueError(
            "found no trial labels - no 'y' variable - in the specified files")

    x = d['x']
    y = numpy.array(d['y'].flat)
    if keeptrials != None:
        x = x[numpy.asarray(keeptrials), :, :]
        y = y[numpy.asarray(keeptrials)]

    if time_window != None:
        fs = d['fs']
        t = SigTools.samples2msec(numpy.arange(x.shape[2]), fs)
        x[:, :, t < min(time_window)] = 0
        x[:, :, t > max(time_window)] = 0

    if classes != None:
        for cl in classes:
            if cl not in y:
                raise ValueError("class %s is not in the dataset" % str(cl))
        mask = numpy.array([yi in classes for yi in y])
        y = y[mask]
        x = x[mask]
        discarded = sum(mask == False)
        if discarded:
            print "discarding %d trials that are outside the requested classes %s" % (
                discarded, str(classes))

    n = len(y)
    uy = numpy.unique(y)
    if uy.size != 2:
        raise ValueError("expected 2 classes in dataset, found %d : %s" %
                         (uy.size, str(uy)))
    for uyi in uy:
        nyi = sum([yi == uyi for yi in y])
        nyi_min = 2
        if nyi < nyi_min:
            raise ValueError(
                'only %d exemplars of class %s - need at least %d' %
                (nyi, str(uyi), nyi_min))

    y = numpy.sign(y - uy.mean())

    cov, trchvar = SigTools.spcov(
        x=x, y=y, balance=False,
        return_trchvar=True)  # NB: symwhitenkern would not be able to balance

    starttime = time.time()

    chlower = [ch.lower() for ch in d['channels']]
    if keepchan in [None, (), '', []]:
        if isinstance(rmchan, basestring): rmchan = rmchan.split()
        if isinstance(rmchan_usualsuspects, basestring):
            rmchan_usualsuspects = rmchan_usualsuspects.split()
        allrmchan = [
            ch.lower() for ch in list(rmchan) + list(rmchan_usualsuspects)
        ]
        unwanted = numpy.array([ch in allrmchan for ch in chlower])
        notfound = [ch for ch in rmchan if ch.lower() not in chlower]
    else:
        if isinstance(keepchan, basestring): keepchan = keepchan.split()
        lowerkeepchan = [ch.lower() for ch in keepchan]
        unwanted = numpy.array([ch not in lowerkeepchan for ch in chlower])
        notfound = [ch for ch in keepchan if ch.lower() not in chlower]

    wanted = numpy.logical_not(unwanted)
    print ' '
    if len(notfound):
        print "WARNING: could not find channel%s %s\n" % ({
            1: ''
        }.get(len(notfound), 's'), ', '.join(notfound))
    removed = [ch for removing, ch in zip(unwanted, d['channels']) if removing]
    if len(removed):
        print "removed %d channel%s (%s)" % (len(removed), {
            1: ''
        }.get(len(removed), 's'), ', '.join(removed))
    print "classification will be based on %d channel%s" % (sum(wanted), {
        1: ''
    }.get(sum(wanted), 's'))
    print "%d negatives + %d positives = %d exemplars" % (sum(y < 0),
                                                          sum(y > 0), n)
    print ' '

    x[:, unwanted, :] = 0
    cov[:, unwanted] = 0
    cov[unwanted, :] = 0
    nu = numpy.asarray(cov).diagonal()[wanted].mean()
    for i in range(len(cov)):
        if cov[i, i] == 0: cov[i, i] = nu

    if not isinstance(C, (tuple, list, numpy.ndarray, type(None))): C = [C]
    if not isinstance(gamma, (tuple, list, numpy.ndarray, type(None))):
        gamma = [gamma]

    c = SigTools.klr2class(lossfunc=SigTools.balanced_loss, relcost='balance')
    c.varyhyper({})
    if c != None: c.hyper.C = list(C)
    if gamma == None: c.hyper.kernel.func = SigTools.linkern
    else:
        c.varyhyper({
            'kernel.func': SigTools.symwhitenkern,
            'kernel.cov': [cov],
            'kernel.gamma': list(gamma)
        })
    c.cvtrain(x=x, y=y, folds=folds)
    if rebias: c.rebias()
    c.calibrate()

    chosen = c.cv.chosen.hyper
    if gamma == None:
        Ps = None
        Gp = c.featureweight(x=x)
    else:
        Ps = SigTools.svd(
            SigTools.shrinkcov(cov, copy=True,
                               gamma=chosen.kernel.gamma)).isqrtm
        xp = SigTools.spfilt(x, Ps.H, copy=True)
        Gp = c.featureweight(x=xp)

    u = SigTools.stfac(Gp, Ps)
    u.channels = d['channels']
    u.channels_used = wanted
    u.fs = d['fs']
    u.trchvar = trchvar
    try:
        u.channels = SigTools.ChannelSet(u.channels)
    except:
        print 'WARNING: failed to convert channels to ChannelSet'

    elapsed = time.time() - starttime
    minutes = int(elapsed / 60.0)
    seconds = int(round(elapsed - minutes * 60.0))
    print '%d min %d sec' % (minutes, seconds)
    datestamp = time.strftime('%Y-%m-%d %H:%M:%S')
    csummary = '%s (%s) trained on %d (CV %s = %.3f) at %s' % (
        c.__class__.__name__,
        SigTools.experiment()._shortdesc(chosen),
        sum(c.input.istrain),
        c.loss.func.__name__,
        c.loss.train,
        datestamp,
    )
    description = 'binary classification of %s: %s' % (description, csummary)
    u.description = description

    if save or select:
        if not isinstance(save, basestring):
            save = featurefiles
            if isinstance(save, (tuple, list)): save = save[-1]
            if save.lower().endswith('.gz'): save = save[:-3]
            if save.lower().endswith('.pk'): save = save[:-3]
            save = save + '_weights.prm'
        print "\nsaving %s\n" % save
        Parameters.Param(u.G.A,
                         Name='ERPClassifierWeights',
                         Section='PythonSig',
                         Subsection='Epoch',
                         Comment=csummary).write_to(save)
        Parameters.Param(c.model.bias,
                         Name='ERPClassifierBias',
                         Section='PythonSig',
                         Subsection='Epoch',
                         Comment=csummary).append_to(save)
        Parameters.Param(description,
                         Name='SignalProcessingDescription',
                         Section='PythonSig').append_to(save)
        if select:
            if not isinstance(select, basestring): select = 'ChosenWeights.prm'
            if not os.path.isabs(select):
                select = os.path.join(os.path.split(save)[0], select)
            print "saving %s\n" % select
            import shutil
            shutil.copyfile(save, select)

    print description
    return u, c