예제 #1
0
def run(dataDefFile, var1, var2, datapoints):
	tau = TAU
	print('Generating data using: ', dataDefFile)
	fileExt = dataDefFile.split('.')[-1]
	if fileExt == 'py':
		# Data Definition File
		outFile = synthDataGen.run(dataDefFile, datapoints)
	elif fileExt == 'csv':
		# Data file
		outFile = dataDefFile
	else:
		print('*** Invalid file type = ', fileExt)
		return
	d = getData.DataReader(outFile, limit=datapoints)
	fig = plt.figure()
	ax = fig.add_axes([.1,.1,.8,.8],projection='3d')
	
	# If var1 and var2 are not specified, then build a master manifold out of three vars at a time.  Otherwise, build shadow manifolds
	# from the two specified vars
	if var1 is None:
		vars = d.getSeriesNames()
		vars.sort()
		print('Vars = ', vars)
		colors = ['b', 'g', 'r','o'] 
		for i in range(4):
			if len(vars) < i*3+3:
				break
			X = d.getSeries(vars[i*3])
			Y = d.getSeries(vars[i*3+1])
			Z = d.getSeries(vars[i*3+2])
			if standard:
				 X = standardize.standardize(X)
				 Y = standardize.standardize(Y)
				 Z = standardize.standardize(Z)
			color = colors[i]
			ax.plot(X, Y, Z)
			
	else:
		var1D = d.getSeries(var1)
		if standard:
			var1D = standardize.standardize(var1D)
		X1 = var1D[:-2*tau]
		Y1 = var1D[tau:-tau]
		Z1 = var1D[2*tau:]

		var2D = d.getSeries(var2)
		if standard:
			var2D = standardize.standardize(var2D)
		X2 = var2D[:-2*tau]
		Y2 = var2D[tau:-tau]
		Z2 = var2D[2*tau:]
		
		#plotData(d, datapoints)
		ax.plot(X1,Y1,Z1)
		ax.plot(X2,Y2,Z2, 'r')
		#ax.plot(X1, Y1, Z2, 'g')
	
	# `ax` is a 3D-aware axis instance because of the projection='3d' keyword argument to add_subplot
	#ax = fig.add_subplot(1, 2, 1, projection='3d')
	plt.show()
def similarity_chk(inputFile1,inputFile2):
	std_inpt1=standardize.standardize(inputFile1)
	file1=codecs.open(std_inpt1,encoding='utf-8', errors='ignore').read()
	std_inpt2=standardize.standardize(inputFile2)
	file2=codecs.open(std_inpt2,encoding='utf-8', errors='ignore').read()
	ngram1=ngrams(file1, 2) # [['a', 'b'], ['b', 'c'], ['c', 'd']]
	ngram2=ngrams(file2, 2)
	l1 = len(ngram1)
	l2 = len(ngram2)
	print "l1"
	print l1
	print "l2"
	print l2
	interList = [i for i in ngram1 if i in ngram2]
	unionList=ngram1+ngram2
	unionList=uniq(unionList)
	interList=uniq(interList)
	similar = len(interList)
	total = len(unionList)
	print "similar"
	print similar
	print "total"
	print total
	threshold = float (similar) / total
	print "resemblence score "
	print threshold
	return threshold
예제 #3
0
파일: CCM.py 프로젝트: RogerDev/Causality
 def __init__(self, s1, s2, tau=2, dim=4):
     self.s1 = standardize.standardize(s1)
     self.s2 = standardize.standardize(s2)
     self.tau = tau
     self.dim = dim
     self.pointsToTest = []
     self.sLen = len(s1) - (self.dim - 1) * self.tau
     self.pointsToTest = []
     self._generateShadowManifolds()
     return
예제 #4
0
 def translate(self):
     order_of_function = [
         "\\frac",
         "\\int",
         "\\partial",
     ]  # The order is very important!, which still needs more consideration
     directory_of_function = {"\\frac": frac, "\\int": integrate, "\\partial": partial}
     print(self.origin)
     standardize(self)
     print(self.origin)
     for function in order_of_function:
         if function in self.set_of_tex:
             directory_of_function[function](self)
     mul_integer_symbol(self)
     convert_integer(self)
     return self.origin
예제 #5
0
def get_weight_vector(feature_matrix, output, lambda_reg, p):
    w = []
    check = True
    """
    feature_matrix: an n x m 2-D numpy array where n is the number of samples
                    and m the feature size.
    output: an n x 1 numpy array reprsenting the outputs for the n samples
    lambda_reg: regularization parameter
    p: p-norm for the regularized regression

    Return: an m x 1 numpy array weight vector obtained through stochastic gradient descent
            using the provided function parameters such that the matrix product
            of the feature_matrix matrix with this vector will give you the
            n x 1 regression outputs
    """
    if (p < 1 or p > 2):
        check = False
        return w, check

    x_train = copy.deepcopy(feature_matrix)
    x_train = standardize(x_train)
    #    y_train=getY()
    if p == 2:
        w = L2(x_train, y_train, lambda_reg)
    elif p == 1:
        w = L1(x_train, y_train, lambda_reg)
    else:
        w = pNorm(x_train, y_train, lambda_reg, p)
    return w, check
예제 #6
0
 def translate(self):
     order_of_function = [
         '\\frac', '\\int', '\\partial'
     ]  #The order is very important!, which still needs more consideration
     directory_of_function = {
         '\\frac': frac,
         '\\int': integrate,
         '\\partial': partial
     }
     print(self.origin)
     standardize(self)
     print(self.origin)
     for function in order_of_function:
         if function in self.set_of_tex:
             directory_of_function[function](self)
     mul_integer_symbol(self)
     convert_integer(self)
     return self.origin
예제 #7
0
def direction(rvA, rvB):
    """ Test the causal direction between variables A and B
        using one of the LiNGAM or GeNGAM pairwise algorithms.
        Returns a number R.  A positive R indicates that the
        causal path runs from A toward B.  A negative value
        indicates a causal path from B towards A.  Values
        close to zero (e.g. +/- 10**-5) means that causal
        direction could not be determined.
    """
    s1 = standardize(rvA)
    s2 = standardize(rvB)
    # Pairwise Lingam Algorithm (Hyperbolic Tangent (HT) variant)
    cum = 0
    for i in range(len(s1)):
        v1 = s1[i]
        v2 = s2[i]
        cumulant = v1 * math.tanh(v2) - v2 * math.tanh(v1)
        cum += cumulant
    avg = cum / float(len(s1))
    cc = np.corrcoef([s1, s2])
    rho = cc[1, 0]
    R = rho * avg
    return R
예제 #8
0
def solve_LP(LP_matrix, var_constraints, func_constraints, verbose=False):
    """
    :param LP_matrix: the matrix of the LP, size m+1 x n+1
    :param var_constraints: variable constraints, 1: >=0, -1: <= 0, 0: no constraint, size: n
    :param func_constraints: functional constraint, 1: >=0, -1: <= 0, 0: no constraint, size: m
    :param verbose: Bool, if true, additional message is printed
    :return: (solution, solution_value). solution is a n-dimensional array, solution_value is the Z
    value of the solution
    """
    _, columns = LP_matrix.shape
    var_number = columns - 1
    if verbose:
        print("the input of the problem is: LP_matrix = \n{}\nvar_constraints = {}, func_constraints = {}"
              .format(LP_matrix, var_constraints, func_constraints))
    LP_matrix, neg_constraint_vars, no_constraint_vars, nolimit_extra, phase1_slack_vars, bases \
        = standardize(LP_matrix, var_constraints, func_constraints, verbose)
    if phase1_slack_vars.size:
        rows, columns = LP_matrix.shape
        c = LP_matrix[0, :columns-1].copy()
        LP_matrix[0] = 0
        LP_matrix[0, phase1_slack_vars] = 1
        if verbose:
            print("enter phase 1, input for phase 1 is \n{}".format(LP_matrix))
        transform_canonical(LP_matrix, bases, verbose)
        solution, solution_value = solve_canonical_LP(LP_matrix, bases, verbose)
        if solution_value > epsilon:
            raise InfeasibleError("phase 1 failed. The LP is not feasible")
        LP_matrix[0, :columns-1] = c
        LP_matrix[:, phase1_slack_vars] = 0
        if verbose:
            print("phase 1 end, input for phase 2 is \n{}".format(LP_matrix))
        transform_canonical(LP_matrix, bases, verbose)
    if verbose:
        print("enter phase 2")
    solution, solution_value = solve_canonical_LP(LP_matrix, bases, verbose)
    solution[neg_constraint_vars] = -solution[neg_constraint_vars]
    if no_constraint_vars.size:
        solution[no_constraint_vars] -= solution[nolimit_extra]
    solution = solution[:var_number]
    if verbose:
        print("original solution is {}, value is {}".format(solution, solution_value))
    return solution, solution_value
    
예제 #9
0
    def __init__(self, rvList, data={}):
        self.g = networkx.DiGraph()
        self.rvDict = {}
        for rv in rvList:
            if rv.name in self.rvDict.keys():
                raise 'Duplicate variable name = ' + rv.name
            self.rvDict[rv.name] = rv
        self.rvList = list(self.rvDict.keys())
        self.rvList.sort()
        self.g.add_nodes_from(self.rvDict.keys())
        edges = []
        for rv in rvList:
            for pa in rv.parentNames:
                edges.append((pa, rv.name))
        self.g.add_edges_from(edges)
        self.data = data
        edges = self.g.edges()
        self.edgeDict = {}
        for edge in edges:
            s, d = edge
            if s in self.edgeDict.keys():
                self.edgeDict[s].append(edge)
            else:
                self.edgeDict[s] = [edge]
            if d in self.edgeDict.keys():
                self.edgeDict[d].append(edge)
            else:
                self.edgeDict[d] = [edge]
        # Create a probability space object for later use
        self.prob = Prob.ProbSpace(self.data, power=1)
        # Create a separate standardized probability space for independence testing.
        iData = {}
        for var in self.data.keys():
            iData[var] = standardize(self.data[var])
        self.iProb = Prob.ProbSpace(iData)

        self.bdCache = {}
        self.fdCache = {}
예제 #10
0
from synth import synthDataGen
from standardize import standardize
import time

METHOD = 'prob'
#METHOD = 'fcit'
POWER = 1
print('power = ', POWER)
args = sys.argv
test = 'Probability/Test/models/indCalibrationDat.csv'

r = getData.DataReader(test)
dat = r.read()
vars = dat.keys()
for var in vars:
    dat[var] = standardize(dat[var])
#print('dat = ', dat)
ps = Prob.ProbSpace(dat, power=POWER)

# List a variety of independent relationships
indeps = [
    ('L1', 'L2'),
    ('L2', 'L3'),
    ('L1', 'L3'),
    ('E1', 'E2'),
    ('N1', 'N2'),
    ('L4', 'L5'),
    ('L5', 'L6'),
    ('L4', 'N3'),
    ('B', 'D', ['A']),
    ('A', 'C', ['B', 'D']),
예제 #11
0
 def TestModel(self, data=None, order=3):
     # Standardize the data before doing independence testing
     warningPenalty = .0025
     iData = {}
     for var in self.data.keys():
         iData[var] = standardize(self.data[var])
     ps = Prob.ProbSpace(iData)
     numTestTypes = 4
     errors = []
     warnings = []
     if data is None:
         data = self.data
     numTestsPerType = [0] * numTestTypes
     numErrsPerType = [0] * numTestTypes
     deps = self.computeDependencies(order)
     if VERBOSE:
         print('Testing Model for', len(deps), 'Independencies')
     for dep in deps:
         x, y, z, isDep = dep
         if z is None:
             z = []
         pval = independence.test(ps, [x], [y], z, power=1)
         print(x, y, z)
         errStr = None
         warnStr = None
         testType = -1
         if not z and self.isExogenous(x) and self.isExogenous(y):
             testType = 0
         elif not isDep:
             testType = 1
         else:
             testType = 2
         numTestsPerType[testType] += 1
         if testType == 0 and pval < .5:
             errStr = 'Error (Type 0 -- Exogenous variables not independent) -- Expected: ' + self.formatDependency(
                 dep) + ' but dependence was detected. P-val = ' + str(pval)
         elif testType == 2 and pval > .5:
             if pval > .75:
                 errStr = 'Error (Type 2 -- Unexpected independence) -- Expected: ' + self.formatDependency(
                     dep) + ' but no dependence detected.  P-val = ' + str(
                         pval)
             else:
                 warnStr = 'Warning (Type 2 -- Unexpected independence) -- Expected: ' + self.formatDependency(
                     dep
                 ) + ' but minimal dependence detected.  P-val = ' + str(
                     pval)
         elif testType == 1 and pval < .5:
             if pval < .25:
                 errStr = 'Error (Type 1 -- Unexpected dependence) -- Expected: ' + self.formatDependency(
                     dep) + ' but dependence was detected. P-val = ' + str(
                         pval)
             else:
                 warnStr = 'Warning (Type 1 -- Unexpected dependence) -- Expected: ' + self.formatDependency(
                     dep
                 ) + ' but some dependence was detected. P-val = ' + str(
                     pval)
         if errStr:
             if VERBOSE:
                 print('***', errStr)
                 #5/0
             errors.append(
                 (testType, [x], [y], list(z), isDep, pval, errStr))
             numErrsPerType[testType] += 1
         if warnStr:
             if VERBOSE:
                 print('*', warnStr)
                 #5/0
             warnings.append(
                 (testType, [x], [y], list(z), isDep, pval, warnStr))
         elif VERBOSE:
             print('.', )
     # Now test directionalities
     testType = 3
     dresults = self.testAllDirections()
     derrs = 0
     for dresult in dresults:
         isError, cause, effect, rho = dresult
         if isError:
             derrs += 1
             if abs(rho) < .0001:
                 resStr = 'True direction could not be verified.'
                 warnStr = 'Warning (Type 3 -- Incorrect Causal Direction) between ' + cause + ' and ' + effect + '. ' + resStr + '.  Rho = ' + str(
                     rho)
                 warnings.append(
                     (testType, [cause], [effect], [], False, rho, warnStr))
                 if VERBOSE:
                     print('*', warnStr)
             else:
                 resStr = 'Direction appears to be reversed.'
                 errStr = 'Error (Type 3 -- Incorrect Causal Direction) between ' + cause + ' and ' + effect + '. ' + resStr + '.  Rho = ' + str(
                     rho)
                 errors.append(
                     (testType, [cause], [effect], [], False, rho, errStr))
                 if VERBOSE:
                     print('***', errStr)
                 numErrsPerType[testType] += 1
         numTestsPerType[testType] += 1
     confidence = 1.0
     failurePenaltyPerType = [1, 1, 1, 1]
     errorRatios = [0.0] * numTestTypes
     for i in range(numTestTypes):
         nTests = numTestsPerType[i]
         nErrs = numErrsPerType[i]
         if nTests > 0:
             ratio = nErrs / nTests
             errorRatios[i] = ratio
             confidence -= ratio * failurePenaltyPerType[i] / numTestTypes
     warnPenalty = len(warnings) * warningPenalty
     confidence -= warnPenalty
     confidence = max([confidence, 0.0])
     numTotalTests = len(deps)
     if VERBOSE:
         print('Model Testing Completed with', len(errors), 'error(s) and',
               len(warnings), ' warning(s).  Confidence = ',
               round(confidence * 100, 1), '%')
     return (confidence, numTotalTests, numTestsPerType, numErrsPerType,
             errors, warnings)
예제 #12
0
def _etl(raw, extracted, parsed, standardized):
    extract(raw, extracted)
    parse(extracted, parsed)
    standardize(parsed, standardized)
예제 #13
0
    Return: your best m x 1 numpy array weight vector used to predict the output for the
            kaggle competition.

            The matrix product of the feature_matrix, obtained from get_feature_matrix()
            call with file as test_features.csv, with this weight vector should
            result in you best prediction for the test dataset.

    """
    with open('my_w_best', 'rb') as f:
        w_best = pickle.load(f)
    return w_best


check = True  #Imposing check condition in case invalid p is given
feature_matrix = get_feature_matrix('train.csv')
y_train = get_output('train.csv')
lambda_reg = 3  #Change value of regularization parameter here
p = 1.5  #Change value of p here
#Uncomment line 94 to try for specific p and regularization parameter(lambda_reg) and comment out line 96
#w,check=get_weight_vector(feature_matrix, y_train, lambda_reg, p)
if (check):
    w = get_my_best_weight_vector()
    x_test = give_features('test_features.csv')
    x_test = standardize(x_test)
    output_values = np.dot(x_test, w)
    y_test = (output_values).astype('int')
    msg = save_as_csv(y_test)
    print(msg)
else:
    print('Invalid p given. 1<=p<=2')