def kurtosis(str, list):

    s = list
    w = pd.read_csv(str, usecols=s)

    frame = DataFrame(w)

    h = len(w)

    print h
    t = frame.mean()

    d = frame.std()

    e = ((w - t) / d) ** 4

    g = e.sum()

    p1 = h * (h + 1)
    p2 = float((h - 1) * (h - 2) * (h - 3))
    p3 = float(3 * ((h - 1) ** 2))
    p4 = (h - 2) * (h - 3)

    i = ((p1 / p2) * g) - (p3 / p4)

    print "kurtosis=", i
Example #2
0
    def testWLS(self):
        X = DataFrame(np.random.randn(30, 4), columns=["A", "B", "C", "D"])
        Y = Series(np.random.randn(30))
        weights = X.std(1)

        self._check_wls(X, Y, weights)

        weights.ix[[5, 15]] = np.nan
        Y[[2, 21]] = np.nan
        self._check_wls(X, Y, weights)
def stndize(str, list):

    s = list
    w = pd.read_csv(str, usecols=s)
    frame = DataFrame(w)

    t = frame.mean()
    print t
    z = frame.std()
    print z
    print (w - t) / z

    return
Example #4
0
def combine_spread(file_set, shift, drop_return_data=False):
    """
    Combine the spread of input files, return with mean and standard
    deviation calculated.

    """

    data = []
    values = {}
    for val in ("left", "right", "com", "dist", "radius", "diameter"):
        values[val] = {}

    # Collect data from all files into dictionaries
    for i, _file in enumerate(file_set):
        data.append(Spread().read(_file))
        for val in values.keys():
            values[val][i] = Series(data=data[i].spread[val]["val"], index=data[i].times)
        data[i].times = np.array(data[i].times) - shift[i]

    spread = Spread()
    spread.spread["num"] = len(file_set)

    for val in values.keys():

        # Shift time as per synchronisation
        for i in values[val]:
            values[val][i].index = np.array(values[val][i].index) - shift[i]

        # Convert to DataFrame
        df = DataFrame(data=values[val])

        # If not a single file, keep only indices with at least two non-NaN
        if len(file_set) > 1:
            df = df.dropna()

        # If return data dropped, fill data here
        if drop_return_data:
            for i in df.columns:
                data[i].spread[val]["val"] = df[i].tolist()

        # Get times, mean and standard error as lists
        mean = list(df.mean(axis=1))
        std_error = list(df.std(axis=1))
        times = list(df.index)

        # Add to Spread object
        spread.spread[val]["val"] = mean
        spread.spread[val]["std"] = std_error
        spread.spread["times"] = times

    return spread, data
Example #5
0
    def testWLS(self):
        # WLS centered SS changed (fixed) in 0.5.0
        if sm.version.version < "0.5.0":
            raise nose.SkipTest

        X = DataFrame(np.random.randn(30, 4), columns=["A", "B", "C", "D"])
        Y = Series(np.random.randn(30))
        weights = X.std(1)

        self._check_wls(X, Y, weights)

        weights.ix[[5, 15]] = np.nan
        Y[[2, 21]] = np.nan
        self._check_wls(X, Y, weights)
Example #6
0
    def testWLS(self):
        # WLS centered SS changed (fixed) in 0.5.0
        sm_version = sm.version.version
        if sm_version < LooseVersion("0.5.0"):
            raise nose.SkipTest("WLS centered SS not fixed in statsmodels" " version {0}".format(sm_version))

        X = DataFrame(np.random.randn(30, 4), columns=["A", "B", "C", "D"])
        Y = Series(np.random.randn(30))
        weights = X.std(1)

        self._check_wls(X, Y, weights)

        weights.ix[[5, 15]] = np.nan
        Y[[2, 21]] = np.nan
        self._check_wls(X, Y, weights)
Example #7
0
    def testWLS(self):
        # WLS centered SS changed (fixed) in 0.5.0
        if sm.version.version < "0.5.0":
            raise nose.SkipTest

        print("Make sure you're using statsmodels 0.5.0.dev-cec4f26 or later.")

        X = DataFrame(np.random.randn(30, 4), columns=["A", "B", "C", "D"])
        Y = Series(np.random.randn(30))
        weights = X.std(1)

        self._check_wls(X, Y, weights)

        weights.ix[[5, 15]] = np.nan
        Y[[2, 21]] = np.nan
        self._check_wls(X, Y, weights)
def skewness(str, list):
    s = list

    w = pd.read_csv(str, usecols=s)

    frame = DataFrame(w)

    h = len(w)

    t = frame.mean()

    d = frame.std()

    e = ((w - t) / d) ** 3

    g = e.sum()

    i = (h * g) / ((h - 1) * (h - 2))

    print "skewness=", i
Example #9
0
class GetGenes(object):
    def __init__(self, data):
        self.dataframe = DataFrame(data)

        # read a text file and return a data frame. Records should be separated by TAB
        # There should not be duplicate column names

    def import_file(self, filename):
        # this function use to convert string to float
        def convert(x):
            try:
                x = float(x)
            except ValueError:
                pass
            return x

        table = []
        for line in open(filename):
            if line.strip():  # If not empty line
                line = line.rstrip("\n").split("\t")
                line = list(map(convert, line))
                table.append(line)
        self.dataframe = DataFrame(table[1:], columns=table[0])
        return

    def houseKeepingGenes(self, geneNum):
        # compute the CV of data
        std = array(self.dataframe.std(axis=1))
        mean = array(self.dataframe.mean(axis=1))
        CV = std / mean
        CV = list(map(abs, CV))  # convert to positive number

        # get the fist N minimum value
        mins = nsmallest(geneNum, CV)
        print("The GOOD genes are:\n")
        for item in mins:
            print(self.dataframe.ix[CV.index(item)][0])
        return
Example #10
0
)
experiment_data_Raw = experiment_data_Raw.set_index("Timestamp")

final_data = concat([experiment_data_Qber, experiment_data_Raw])

final_data = final_data.sort_index()

# after prepaired data, time to plot it:

for new_counter in range(file_counter + 1):
    # print new_counter
    Qbers = final_data[(final_data["Dataset"] == new_counter) & (final_data["Qber"] > 0)]
    x1 = Qbers.index.tolist()
    y1 = Qbers["Qber"].tolist()
    x1_average = DataFrame.mean(Qbers)["Qber"]
    x1_std_dev = DataFrame.std(Qbers)["Qber"]
    # prepairing proper time:
    x1[:] = [x - quelle_initialTimestamps[new_counter] for x in x1]

    Raws = final_data[(final_data["Dataset"] == new_counter) & (final_data["Raw key"] > 0)]
    x2_average = DataFrame.mean(Raws)["Raw key"]
    x2_median = DataFrame.median(Raws)["Raw key"]
    x2_max = DataFrame.max(Raws)["Raw key"]

    Raws = Raws[Raws["Raw key"] < (x2_max - (x2_max / 100) * 20)]

    x2 = Raws.index.tolist()
    y2 = Raws["Raw key"].tolist()

    print x2_average
    # x2_std_dev = 3
        int(ser_max / 5),
        int(ser_max / 2),
        ser_max,
    ],
)
for i in periods_test.index:  # Sampling 20 times
    for j in periods_test.columns:
        sample = test.reindex(columns=np.random.permutation(test.columns)[:j])
        periods_test.ix[i, j] = sample.iloc[0].corr(sample.iloc[1])  # ix is for label index, iloc is for int index
print periods_test[:5]
print periods_test.describe()

threshold = 0.1
temp_std = 0
# Take the threshold num which makes sampling correlation stable
for i, std in enumerate(periods_test.std()):
    if std < 0.1 and temp_std >= 0.1:
        mini_period = periods_test.columns[i]
        break
    temp_std = std

# Decide the value of min_periods. Set std 0.05 as threshold
# mini_period = 200
check_size = int(len(data.index) * 0.2)  # 20% dataset for testing
check = {}
check_data = data.copy()  # Avoid the changes on original data
check_data = check_data.ix[
    check_data.count(axis=1) > mini_period
]  # Filter users with few ratings. If there is no axis, the sum is the whole matrix
for user in np.random.permutation(check_data.index):
    movie = np.random.permutation(check_data.ix[user].dropna().index)[0]
Example #12
0
import numpy as np
from pandas import DataFrame, Series
import pandas as pd
import matplotlib.pyplot as plt

df = DataFrame(abs(np.random.randn(30).reshape(6, 5)) * 100)

plt.bar(np.arange(len(df.mean())), df.mean(), align="center", color="white", linewidth=1.5)

plt.hold(True)

plt.errorbar(np.arange(len(df.mean())), df.mean(), df.std(), elinewidth=1.2, capsize=7.5, fmt=None)
plt.show()
Example #13
0
import numpy as np
from pandas import DataFrame, Series
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats

df = DataFrame(abs(np.random.randn(30).reshape(6, 5)) * 100)

plt.bar(
    np.arange(len(df.mean())),
    df.mean(),
    align="center",
    color="white",
    yerr=df.std(),
    ecolor="black",
    capsize=5,
    linewidth=1,
)
plt.grid()


plt.show()
Example #14
0
def test():
    # a : adulte isolé
    # b : couple
    # c : enfant dans couple
    # d : enfan isolé
    # e : ado couple
    # f : ado isolé
    # g : chambre d'enfant

    # A: 2a,2e
    #  b + 2*c + g
    fa = [0, 1, 2, 0, 0, 0, 1]
    ma = 2754.74

    # B : 2a,2ea,supp:
    #  b + 2*e + 2*g
    fb = [0, 1, 0, 0, 2, 0, 2]
    mb = 3165.15

    # C : 1a,2e:
    #  a + 2*d + g
    fc = [1, 0, 0, 2, 0, 0, 1]
    mc = 2291.04

    # D: 2a, 2e, 2ea, 2*supp :
    #   b + 2*c + 2*e + 3*g
    fd = [0, 1, 2, 0, 2, 0, 3]
    md = 3969.81

    # E : 2a,1ea
    #    b + e + g
    fe = [0, 1, 0, 0, 1, 0, 1]
    me = 2549.17

    # F : 2a, 1e, 2ea
    #    b + c + 2*e + 2*g
    ff = [0, 1, 1, 0, 2, 0, 2]
    mf = 3514.12

    # G: 2a, 1e ,1ea, supp
    #   b + c + e + 2*g
    fg = [0, 1, 1, 0, 1, 0, 2]
    mg = 3042.39

    # H: 1a, 1ea
    #    a + f + g
    fh = [1, 0, 0, 0, 0, 1, 1]
    mh = 2103.91

    # solve f*x = m

    # A supplementary equation is needed because the system is inconsistant
    fsup = [1, -1 / 1.5, 0, 0, 0, 0, 0]
    msup = 0
    f = [fa, fb, fc, fd, fe, ff, fg, fh, fsup]
    m = [ma, mb, mc, md, me, mf, mg, mh, msup]

    results = DataFrame()

    for i in range(8):
        selected_f1 = list(f)
        selected_m1 = list(m)
        selected_f1.pop(i)
        selected_m1.pop(i)
        for j in range(7):
            selected_f = list(selected_f1)
            selected_m = list(selected_m1)
            selected_f.pop(j)
            selected_m.pop(j)

            f_mat = np.array(selected_f)

            m_vec = np.array(selected_m)

            # print i, np.linalg.det(f_mat)
            try:
                x = DataFrame({str(i) + str(j): np.linalg.solve(f_mat, m_vec)}).T
            except:

                x = None

            from pandas import concat

            if x is not None:
                results = concat([results, x])

    print results
    print results.mean()
    print results.std()
    print results.std() / results.mean()
Example #15
0
    def run(
        self,
        Model="ridge",
        kernel="linear",
        cross_validationMethod="KFold",
        FeatureSelection="PCA",
        n_features=20,
        scoringList=["specificity", "sensitivity", "precision", "f1", "accuracy", "ss_mean"],
        isSaveCsv=None,
        isSavePickle=None,
        isSaveFig=None,
        isPerm=0,
        isBetweenSubjects=True,
        isConcatTwoLabels=False,
    ):
        # -- TODO :
        # --  # Greedy selection on features + Other feature selection types...
        # --  # Make sure featuers are Best only based on train data!!!
        # --  # Keep a list of n_train, n_test from each Label and scoring (accuracy, f1..) in each cross validation iteration
        # --  # Plot results summary (see CARS paper for desired results for Ein Gedi Poster 22-1-2015)
        # --  # remove irelevant data using 'Tracking Success' and consider 'TimeStamps' for feature calculation
        # --  # add f feature analysis by facial part (see excel)
        # --  # select best model (svm, otherwise ridge regression)
        # --  # compare svc results with regerssion results (using LOO and different Params for regression  - params for unbalanced data, different kernels, etc.), model evaluation - http://scikit-learn.org/stable/modules/model_evaluation.html)
        # --  # check how the model weights behave - feature selection analysis
        # --  # calc model error
        # --  # divide data to subparts for training and testing - try within/ between subject, and analyze distribution of features when data is divided
        # --  # LOO - also on bool labels (patients vs controls and mental status bool)
        # --  # add mental status rank scores (0-4)
        # --  # make sure p-val returns the right value in 'scores'
        # --  # run it over random data (permutation test)
        # --  # continoue here - check regression results-Make sure regression works (not so good).. check what happens in svc for G7 (high train R, negative test R)

        ## init
        FeatureTypeList = [j for j in tuple(self.FeaturesDF.index)]
        self.FullResults = DF()
        self.Learningdetails = {
            "Model": Model,
            "Kernel": kernel,
            "CrossVal": cross_validationMethod,
            "FeatureSelection": FeatureSelection,
            "LabelBy": self.Details["LabelDetails"].keys()[0],
            "FeatureMethod": self.Details["FeatureMethod"],
            "PieceLength": self.Details["PieceLength"],
        }
        print("\n------------Learning Details------------")
        print(DF.from_dict(self.Learningdetails, orient="index"))
        print("\n----" + cross_validationMethod + " Cross validation Results:----")

        # Set learning params (cross validation method, and model for learning)
        isBoolLabel = self.LabelsObject.isBoolLabel
        isBoolScores = isBoolLabel
        model, isBoolModel, featureSelectionMethod, selectFeaturesFunction = learningUtils.setModel(
            Model, FeatureSelection, n_features
        )
        # define global variables over modules (to be used in myUtils)
        globalVars.transformMargins = 0  # lambda x:x
        globalVars.isBoolLabel = isBoolLabel
        globalVars.isBoolModel = isBoolModel
        global trainLabels_all, testLabels_all, TrueLabels, isAddDroppedSubjects
        trainLabels_all, testLabels_all, TrueLabels, isAddDroppedSubjects = labelUtils.initTrainTestLabels_all(
            self.LabelsObject
        )
        trainLabels_all2, testLabels_all2, TrueLabels2, isAddDroppedSubjects2 = labelUtils.initTrainTestLabels_all(
            self.LabelsObject2
        )

        LabelingList = ["N1"]  # trainLabels_all.columns
        self.ResultsDF = DF()
        self.BestFeatures = DF(columns=LabelingList)  # dict of BestFeaturesDF according to Labeling methods
        YpredictedOverAllLabels = pandas.Panel(
            items=range(len(trainLabels_all)), major_axis=LabelingList, minor_axis=TrueLabels.index
        )  # panel: items=cv_ind, major=labels, minor=#TODO

        ## Create train and test sets according to LabelBy, repeat learning each time on different Labels from LabelingList.
        for label_ind, Labeling in enumerate(LabelingList):
            """if isPerm: #TODO - fix this to work with continous / bool data
                try:
                    trainLabels=self.LabelsObject.permedLabelsDF[Labeling]
                except AttributeError:
                    self.LabelsObject.permLabels()
                    trainLabels=self.LabelsObject.permedLabelsDF[Labeling]"""
            # set subjects list according to labels and features
            X, SubjectsList, droppedSubjects, Xdropped = featuresUtils.initX(self.FeaturesDF, trainLabels_all, Labeling)
            X2, SubjectsList2, droppedSubjects2, Xdropped2 = featuresUtils.initX(
                self.FeaturesDF, trainLabels_all2, Labeling, is2=1
            )

            # init train and test labels
            trainLabels, testLabels, LabelRange = labelUtils.initTrainTestLabels(
                Labeling, SubjectsList, trainLabels_all, testLabels_all
            )
            trainLabels2, testLabels2, LabelRange2 = labelUtils.initTrainTestLabels(
                Labeling, SubjectsList2, trainLabels_all2, testLabels_all2
            )

            # make sure only labeled subjects are used for classification
            X = X.query("subject == " + str(list(trainLabels.index)))
            X.index.get_level_values(X.index.names[0])
            SubjectIndex = list(set(X.index.get_level_values("subject")))

            X2 = X2.query("subject == " + str(list(trainLabels2.index)))
            X2.index.get_level_values(X2.index.names[0])
            SubjectIndex2 = list(set(X2.index.get_level_values("subject")))
            # init vars
            if isBetweenSubjects:
                cv_param = len(SubjectIndex)
                self.Learningdetails["CrossValSubjects"] = "between"
                isWithinSubjects = False
            else:
                isWithinSubjects = True
                X = X.swaplevel(0, 1)
                PieceIndex = list(set(X.index.get_level_values("Piece_ind")))
                cv_param = len(PieceIndex)
                self.Learningdetails["CrossValSubjects"] = "within"

            self.Learningdetails["NumOfFeatures"] = n_features

            print("\n**" + Labeling + "**")

            cv, crossValScores = learningUtils.setCrossValidation(
                cross_validationMethod, cv_param, trainLabels, isWithinSubjects
            )

            ## Learning - feature selection for different scoring types, with cross validation -

            BestFeaturesForLabel = self.BestFeaturesForLabel(
                FeatureTypeList, LabelingList, n_features
            )  # saves dataframe with best features for each label, for later analysis
            cv_ind = 0
            # used for transforming from margins returned from svm to continouse labels (e.g . PANSS)
            trainScores = DF()
            test_index = X.index
            testScores = concat([DF(index=test_index), DF(index=["std_train_err"])])
            testScores2 = concat([DF(index=testLabels.index), DF(index=["std_train_err"])])
            # impt=Imputer(missing_values='NaN', strategy='median', axis=0)

            globalVars.LabelRange = LabelRange

            ModelWeights1 = DF(columns=range(len(cv)), index=X.columns)
            Components = pandas.Panel(
                items=range(len(cv)), major_axis=X.columns, minor_axis=range(n_features)
            )  # todo fix this for 1st and second learning
            ExplainedVar = DF(columns=range(len(cv)))
            ModelWeights2 = DF(columns=range(len(cv)))
            for train, test in cv:

                if isBetweenSubjects:
                    # set X and Y
                    train_subjects = trainLabels.iloc[train].index
                    test_subjects = testLabels.iloc[test].index
                    Xtrain, Xtest, Ytrain, YtrainTrue, Ytest = learningUtils.setXYTrainXYTest(
                        X, Labeling, trainLabels, testLabels, TrueLabels, train_subjects, test_subjects
                    )
                    Xtrain2, Xtest2, Ytrain2, YtrainTrue2, Ytest2 = learningUtils.setXYTrainXYTest(
                        X2, Labeling, trainLabels2, testLabels2, TrueLabels2, train_subjects, test_subjects
                    )

                    if isConcatTwoLabels:  # used when there is more than one doctor
                        Xtrain = concat([Xtrain, Xtrain2])
                        Xtest = concat([Xtest, Xtest2])
                        Ytrain = concat([Ytrain, Ytrain2])
                        YtrainTrue = concat([YtrainTrue, YtrainTrue2])
                        Ytest = concat([Ytest, Ytest2])
                        Xdropped = concat([Xdropped, Xdropped2])
                        SubjectsList = list(set(SubjectsList).intersection(set(SubjectsList2)))
                        droppedSubjects = list(
                            set(droppedSubjects).union(set(droppedSubjects2)).difference(set(SubjectsList))
                        )  # diff from SubjectsList to make sure no subjects are both in train and test.
                    """else:
                        Xtrain=Xtrain1
                        Xtest=Xtest1
                        Xdropped=Xdropped1
                        Ytrain=Ytrain1
                        YtrainTrue=YtrainTrue1
                        Ytest=Ytest1"""

                    # select N best features:
                    Xtrain, Xtest, bestNfeatures, components, explainedVar, decomposeFunc = learningUtils.selectBestNfeatures(
                        Xtrain, Xtest, Ytrain, n_features, selectFeaturesFunction
                    )
                    BestFeaturesForLabel.add(bestNfeatures)  # todo - delete this??

                    # train 1
                    TrainModel = model
                    TrainModel.fit(Xtrain.sort_index(), Ytrain.T.sort_index())
                    try:
                        Components[cv_ind] = components.T
                        ExplainedVar[cv_ind] = explainedVar
                        isDecompose = True
                        if cv_ind == 0:
                            ModelWeights1 = DF(columns=range(len(cv)), index=range(len(bestNfeatures)))
                        ModelWeights1[cv_ind] = TrainModel.coef_.flatten()
                    except AttributeError:
                        isDecompose = False
                        ModelWeights1[cv_ind].loc[bestNfeatures] = TrainModel.coef_.flatten()
                    self.isDecompose = isDecompose
                    # train 2
                    if isBoolLabel:
                        PiecePrediction_train = DF(
                            TrainModel.predict(Xtrain), index=Xtrain.index, columns=["prediction"]
                        )
                        TrainModel2 = svm.SVC(kernel="linear", probability=True, class_weight={0: 1, 1: 1})
                    else:
                        PiecePrediction_train = DF(
                            TrainModel.decision_function(Xtrain), index=Xtrain.index, columns=["prediction"]
                        )
                        TrainModel2 = linear_model.LinearRegression()

                    Xtrain2, Ytrain2, YtrainTrue2 = learningUtils.getX2Y2(
                        Xtrain, Ytrain, YtrainTrue, PiecePrediction_train, isBoolLabel
                    )
                    TrainModel2.fit(Xtrain2, Ytrain2)
                    if cv_ind == 0:
                        ModelWeights2 = DF(columns=range(len(cv)), index=Xtrain2.columns)
                    ModelWeights2[cv_ind] = TrainModel2.coef_.flatten()

                    # test 1
                    if (
                        isAddDroppedSubjects
                    ):  # take test subjects from cv + subjects that were dropped for labeling used for test
                        if isDecompose:
                            dXdropped = DF(decomposeFunc(Xdropped).values, index=Xdropped.index)
                        XtestDropped = dXdropped[bestNfeatures]
                        YtestDropped = Series(XtestDropped.copy().icol(0))
                        # YTrueDropped=Series(Xdropped.copy().icol(0))
                        for subject in droppedSubjects:
                            YtestDropped[subject] = testLabels_all[Labeling].loc[subject]
                            # YTrueAll.loc[subject]=TrueLabels[Labeling].loc[subject]
                        Ytest = concat([Ytest, YtestDropped]).sort_index()
                        Xtest = concat([Xtest, XtestDropped]).sort_index()

                    if isPerm:  # TODO- Check this!!
                        Ytest = y_perms.loc[Ytest.index]
                    Xtest = Xtest.fillna(0.0)

                elif isWithinSubjects:
                    # train 1
                    train_pieces = PieceIndex[train]
                    test_pieces = PieceIndex[
                        test
                    ]  # TODO - make sure that if test/train> piece index, it ignores it and repeate the process

                    XtrainAllFeatures = X.query("Piece_ind == " + str(list(train_pieces)))
                    Ytrain = Series(index=X.index)
                    Ytest = Series(index=X.index)
                    YtrainTrue = Series(index=X.index)

                    for subject in PieceIndex:
                        for piece in train_pieces:
                            Ytrain.loc[piece].loc[subject] = trainLabels[subject]
                            YtrainTrue.loc[piece].loc[subject] = TrueLabels[Labeling].loc[subject]
                            Ytest.loc[piece].loc[subject] = testLabels[subject]
                    Ytrain = Ytrain.dropna()
                    YtrainTrue = YtrainTrue.dropna()
                    for subject in test_subjects:
                        Ytest.loc[piece].loc[subject] = testLabels[subject]
                # train scores 1
                if cv_ind == 0:
                    trainScores, YtrainPredicted = learningUtils.getTrainScores(Ytrain, Xtrain, YtrainTrue, TrainModel)
                    plt.figure(1)
                    if len(LabelingList) > 1:
                        plt.subplot(round(len(LabelingList) / 2), 2, label_ind + 1)
                    if isBoolLabel:
                        testScores = learningUtils.getTestScores(Ytest, Xtest, TrainModel)
                    else:
                        testScores[cv_ind] = learningUtils.getTestScores(Ytest, Xtest, TrainModel)
                        plt.title(Labeling, fontsize=10)
                else:
                    plt.figure(3)
                    new_trainScores, YtrainPredicted = learningUtils.getTrainScores(
                        Ytrain, Xtrain, YtrainTrue, TrainModel
                    )
                    trainScores = concat([trainScores, new_trainScores], axis=1)
                    # test 1
                    testScores[cv_ind] = learningUtils.getTestScores(Ytest, Xtest, TrainModel)

                # train2

                if isBoolLabel:
                    PiecePrediction_test = DF(TrainModel.predict(Xtest), index=Xtest.index, columns=["prediction"])
                else:
                    PiecePrediction_test = DF(
                        TrainModel.decision_function(Xtest), index=Xtest.index, columns=["prediction"]
                    )
                Xtest2, Ytest2, YtestTrue2 = learningUtils.getX2Y2(
                    Xtest, Ytest, Ytest, PiecePrediction_test, isBoolLabel
                )

                if cv_ind == 0:
                    trainScores2, YtrainPredicted2 = learningUtils.getTrainScores(
                        Ytrain2, Xtrain2, YtrainTrue2, TrainModel2
                    )
                    YpredictedOverAllLabels[cv_ind].loc[Labeling] = YtrainPredicted2
                    # plt.figure(1)
                    # if len(LabelingList)>1:
                    # plt.subplot(round(len(LabelingList)/2),2,label_ind+1)
                    # test2
                    if isBoolLabel:
                        testScores2 = learningUtils.getTestScores(Ytest2, Xtest2, TrainModel2)
                    else:
                        testScores2[cv_ind] = learningUtils.getTestScores(Ytest2, Xtest2, TrainModel2)
                    # plt.title(Labeling,fontsize=10)
                else:
                    new_trainScores2, YtrainPredicted2 = learningUtils.getTrainScores(
                        Ytrain2, Xtrain2, YtrainTrue2, TrainModel2
                    )
                    YpredictedOverAllLabels[cv_ind].loc[Labeling] = YtrainPredicted2
                    trainScores2 = concat([trainScores2, new_trainScores2], axis=1)
                    testScores2[cv_ind] = learningUtils.getTestScores(Ytest2, Xtest2, TrainModel2)
                cv_ind += 1

                # crossValScores=crossValScores.append(CVscoresDF,ignore_index=True) #information about entire train test data.
            fig2 = plt.figure(2)
            if len(LabelingList) > 1:
                plt.subplot(round(len(LabelingList) / 2), 2, label_ind + 1)
            # if isAddDroppedSubjects:
            # testLabelsSummary=testLabels_all[Labeling].loc[AllSubjects]
            # else:
            # testLabelsSummary=testLabels
            scoresSummary = learningUtils.getScoresSummary(trainScores2, testScores2, TrueLabels[Labeling])
            # reset global vars
            globalVars.fitYscale = "notDefined"
            globalVars.beta = DF()

            plt.title(Labeling, fontsize=10)
            plt.xlabel("Ytrue", fontsize=8)
            plt.ylabel("Ypredicted", fontsize=8)
            plt.tick_params(labelsize=6)
            # print(crossValScores.T)
            scores = scoresSummary.fillna(0.0)

            # analyze feature weightsL

            WeightedFeatures1 = DF(
                [ModelWeights1.mean(axis=1), ModelWeights1.std(axis=1)], index=["mean", "std"]
            ).T.fillna(0)
            if isDecompose == 0:
                WeightedFeatures1FeatureType = WeightedFeatures1.mean(level="FeatureType")
                WeightedFeatures1FsSingal = WeightedFeatures1.mean(level="fs-signal")
                WeightedFeatures1 = concat(
                    [
                        DF(index=["-------(A) FeatureType-------"]),
                        WeightedFeatures1FeatureType,
                        DF(index=["-------(B) faceshift signal-------"]),
                        WeightedFeatures1FsSingal,
                    ]
                )

            WeightedFeatures2 = DF(
                [ModelWeights2.mean(axis=1), ModelWeights2.std(axis=1)], index=["mean", "std"]
            ).T.fillna(0)
            BestFeatures = concat(
                [
                    DF(index=["------------- Learning 1 -------------"]),
                    WeightedFeatures1,
                    DF(index=["------------- Learning 2 -------------"]),
                    WeightedFeatures2,
                ]
            )
            self.BestFeatures[Labeling] = BestFeatures["mean"]

            # analyze decomposition
            if isDecompose:
                Components_mean = Components.mean(axis=0)
                Components_std = Components.std(axis=0)
                ExplainedVar_mean = DF(ExplainedVar.mean(axis=1)).T  # todo- check!
                ExplainedVar_mean.index = ["ExplainedVar_mean"]
                ExplainedVar_std = DF(ExplainedVar.std(axis=1)).T  # todo- check!
                ExplainedVar_std.index = ["ExplainedVar_std"]
                try:
                    self.LabelComponents[Labeling] = concat(
                        [
                            DF(index=["---components mean---"]),
                            Components_mean,
                            ExplainedVar_mean,
                            DF(index=["---components std over cross validation---"]),
                            Components_std,
                            ExplainedVar_std,
                        ]
                    )
                except AttributeError:
                    self.LabelComponents = dict.fromkeys(LabelingList)
                    self.LabelComponents[Labeling] = concat(
                        [
                            DF(index=["---components mean---"]),
                            Components_mean,
                            ExplainedVar_mean,
                            DF(index=["---components std over cross validation---"]),
                            Components_std,
                            ExplainedVar_std,
                        ]
                    )

                """print(Components_mean)
                print(ExplainedVar_mean)
                print(WeightedFeatures1)"""

            # BestFeaturesForLabel.analyze(ByLevel=0) #TODO change to regression coeff
            LabelFullResults = concat([DF(index=[Labeling]), scores])

            self.FullResults = concat([self.FullResults, LabelFullResults])
            self.ResultsDF = concat([self.ResultsDF, DF(scores[0], columns=[Labeling])], axis=1)
        # continue here!! to build pseudo inverse matrix from predicted to true - make sure columns + rows are set!

        # self.BestFeatures[Labeling]=BestFeaturesForLabel.WeightedMean

        # plt.savefig('C:\\Users\\taliat01\\Desktop\\TALIA\\Code-Python\\Results\\'+Labeling+'png')
        testScores3 = pandas.Panel(items=range(len(X2.index)))  # for each cv score...
        FullSubjectsList = YpredictedOverAllLabels[0].columns
        YdroppNans = YpredictedOverAllLabels.dropna(axis=0, how="all")
        YdroppNans = YdroppNans.dropna(axis=1, how="all")
        YpredictedOverAllLabels = YdroppNans.dropna(axis=2, how="all")
        notNans_cv_ind = YpredictedOverAllLabels.items
        notNans_trainSubjects = YpredictedOverAllLabels.minor_axis
        notNans_LabelsList = YpredictedOverAllLabels.major_axis
        notNans_TrueLabels = TrueLabels.T[notNans_trainSubjects].loc[notNans_LabelsList]
        cv_ind = 0
        for train, test in cv:
            if cv_ind in notNans_cv_ind:
                print(test)
                train = list(set(FullSubjectsList[train]).intersection(set(notNans_trainSubjects)))
                test = list(set(FullSubjectsList[test]).intersection(set(notNans_trainSubjects)))
                if len(train) > 0 and len(test) > 0:
                    AllLabelsYTrainPredicted = YpredictedOverAllLabels[cv_ind][train]
                    AllLabelsYTrainPredicted = AllLabelsYTrainPredicted.fillna(0)
                    AllLabelsYTrainTrue = notNans_TrueLabels[train]
                    AllLabelsYTestPredicted = YpredictedOverAllLabels[cv_ind][test]
                    AllLabelsYTestTrue = notNans_TrueLabels[test]

                    pseudoInverse_AllLabelsYTrainTrue = DF(
                        np.linalg.pinv(AllLabelsYTrainTrue),
                        columns=AllLabelsYTrainTrue.index,
                        index=AllLabelsYTrainTrue.columns,
                    )
                    global AllLabelsTransformationMatrix
                    AllLabelsTransformationMatrix = DF(
                        AllLabelsYTrainPredicted.dot(pseudoInverse_AllLabelsYTrainTrue),
                        columns=pseudoInverse_AllLabelsYTrainTrue.columns,
                    )  # change to real code!!
                TrainModel3 = lambda y: y.T.dot(AllLabelsTransformationMatrix)
                testscores3[cv_ind] = learningUtils.getTestScores(
                    AllLabelsYTrainTrue, AllLabelsYTrainPredicted, TrainModel3
                )
            cv_ind += 1

        self.ResultsDF = self.ResultsDF.fillna(0.0)

        ## Print and save results
        print("\n")
        print(self.ResultsDF)
        print("\n")
        D = self.Learningdetails
        savePath = (
            resultsPath
            + "\\"
            + D["Model"]
            + "_"
            + D["CrossVal"]
            + "_LabelBy"
            + D["LabelBy"]
            + "_Features"
            + D["FeatureMethod"]
            + "_FS"
            + FeatureSelection
            + "_Kernel"
            + D["Kernel"]
            + "_"
            + D["CrossValSubjects"]
            + "Subjects_PieceSize"
            + D["PieceLength"]
        )
        if isPerm:
            savePath = savePath + "_PERMStest"
        saveName = savePath + "\\" + str(n_features) + "_features"
        self.Learningdetails["saveDir"] = savePath
        dir = os.path.dirname(saveName)
        if not os.path.exists(dir):
            os.makedirs(dir)
        if isSavePickle is None:
            isSavePickle = int(raw_input("Save Results to pickle? "))
        if isSaveCsv is None:
            isSaveCsv = int(raw_input("save Results to csv? "))
        if isSaveFig is None:
            isSaveFig = int(raw_input("save Results to figure? "))

        if isSavePickle:
            self.ResultsDF.to_pickle(saveName + ".pickle")
            self.BestFeatures.to_pickle(saveName + "_bestFeatures.pickle")

        if isSaveCsv:
            DetailsDF = DF.from_dict(self.Learningdetails, orient="index")
            ResultsCSV = concat(
                [
                    self.ResultsDF,
                    DF(index=["-------Label Details-------"]),
                    self.N,
                    DF(index=["-------Learning Details-------"]),
                    DetailsDF,
                    DF(index=["-------Selected Features Analysis------"]),
                    self.BestFeatures,
                ]
            )
            ResultsCSV.to_csv(saveName + ".csv")

        if isSaveCsv or isSavePickle:
            print("successfully saved as:\n" + saveName)

        if isSaveFig:
            plt.figure(1)
            plt.savefig(saveName + "Train.png")
            plt.figure(2)
            plt.savefig(saveName + "Test.png")
        plt.close()
        plt.close()