Esempio n. 1
0
    def select_complex(self, out_table, clause, in_table="base"):
        in_df = self.table_store[in_table].df()  # type:pd.DataFrame
        c = self.table_store[in_table].centroids
        out_df = None
        if clause is None:
            self.table_store[out_table] = DataTable(df=in_df)
            return
        once = True
        for i in clause:
            tmp_df = in_df
            for j in clause[i]:
                print(i, j, clause[i][j])

                equation = clause[i][j]
                val = float(equation["val"])
                param = equation["param"]
                if equation["comp"] == "<":
                    tmp_df = tmp_df.loc[tmp_df[param] < val]
                elif equation["comp"] == "<=":
                    tmp_df = tmp_df.loc[tmp_df[param] <= val]
                elif equation["comp"] == "=":
                    tmp_df = tmp_df.loc[tmp_df[param] == val]
                elif equation["comp"] == "=>":
                    tmp_df = tmp_df.loc[tmp_df[param] >= val]
                elif equation["comp"] == ">":
                    tmp_df = tmp_df.loc[tmp_df[param] > val]
            if once:
                once = False
                out_df = tmp_df
            else:
                out_df = tmp_df.combine_first(out_df)
        if once:  #if no clauses were within the dict
            out_df = in_df
        self.table_store[out_table] = DataTable(df=out_df, centroids=c)
Esempio n. 2
0
    def aggregate(self, out_table, mode, limit=0, in_table="base"):
        df = self.table_store[in_table].df()  #type:pd.DataFrame
        c = self.table_store[in_table].centroids
        #new_df = pd.DataFrame(columns=attr_names)
        ## only view selected columns
        data = dict()  ## dictionary for results
        ## aggregate all rows in one step if limit is 0
        if limit is 0:
            limit = len(df.index)
        ## iterate over columns
        for attr, values in df.iteritems():
            data[attr] = list()
            i = 0
            if attr == TIME_ATTR:
                for value in values:
                    if i % limit == 0 and mode is "MIN":
                        data[attr].append(value)
                    i += 1
                    if i % limit == 0 and mode is not "MIN":
                        data[attr].append(value)

            else:
                ## track data characteristics for result
                sum = 0
                min = 0
                max = 0
                ## count how many non-missing values in interval
                valid_cnt = 0
                for value in values:
                    ## update min, max and sum for each value
                    if i % limit == 0 or np.isnan(min):
                        min = value
                        max = value
                        sum = 0
                        valid_cnt = 0
                    else:
                        if not np.isnan(value):
                            if value < min:
                                min = value
                            if value > max:
                                max = value
                    if not np.isnan(value):
                        valid_cnt += 1
                        sum = sum + value
                    i = i + 1
                    ## produce new result row if i%limit==0
                    if i % limit is 0:
                        insval = np.nan
                        if valid_cnt > 0:
                            if mode is "AVG":
                                insval = sum / valid_cnt
                            elif mode is "MAX":
                                insval = max
                            elif mode is "MIN":
                                insval = min
                        data[attr].append(insval)
                        #new_df.insert(int(i / limit), attr, insval)

        self.table_store[out_table] = DataTable(df=pd.DataFrame(data),
                                                centroids=c)
Esempio n. 3
0
    def project(self, out_table, attr1, attr2, in_table="base"):
        df = self.table_store[in_table].df()

        ## narrow data to the specified attributes
        df = df[[attr1, attr2]]

        ## store results in new table
        self.table_store[out_table] = DataTable(df=df)
Esempio n. 4
0
    def select(self, out_table, attr_name, a=None, b=None, in_table="base"):
        df = self.table_store[in_table].df()
        c = self.table_store[in_table].centroids

        ## select the according tuples for these boundaries
        ## for the given attribute
        if a is not None:
            df = df.loc[df[attr_name] >= a]
        if b is not None:
            df = df.loc[df[attr_name] <= b]

        ## store results in new table
        self.table_store[out_table] = DataTable(df=df, centroids=c)
Esempio n. 5
0
    def cluster(self, out_table, k, params, in_table="base"):
        df = self.table_store[in_table].df()  # type:pd.DataFrame

        ##TMP
        #check for null values and remove them
        #(retain only not-null-values for relevant params)
        for p in params:
            df = df.loc[df[p].notnull()]

        kmeans = cluster.KMeans(n_clusters=k)
        kmeans.fit(df[params])
        centroids = kmeans.cluster_centers_

        df["_label"] = kmeans.labels_

        self.table_store[out_table] = DataTable(df=df, centroids=centroids)
Esempio n. 6
0
    def normalize(self, out_table, params=None, in_table="base"):
        if params is None:
            params = self.get_grain_columns()
        df = self.table_store[in_table].df()  # type:pd.DataFrame
        c = self.table_store[in_table].centroids
        other_params = [col for col in df.columns if col not in params]
        #temporarily save unconcerned columns
        out_df = df[other_params]

        #normalize all other columns
        df = df[params]
        df = df.div(df.sum(axis=1), axis=0)

        #put them back together
        out_df = out_df.join(df)
        self.table_store[out_table] = DataTable(df=out_df, centroids=c)
Esempio n. 7
0
    def newcols(self, out_table, mode, newcols, in_table="base"):
        df = self.table_store[in_table].df().copy()  # type:pd.DataFrame
        c = self.table_store[in_table].centroids

        if mode == "SUM":
            for new_colname, paramlist in newcols.items():
                df[new_colname] = df[paramlist].sum(axis=1)
        elif mode == "MEAN":
            for new_colname, paramlist in newcols.items():
                df[new_colname] = df[paramlist].mean(axis=1)
        elif mode == "MIN":
            for new_colname, paramlist in newcols.items():
                df[new_colname] = df[paramlist].min(axis=1)
        elif mode == "MAX":
            for new_colname, paramlist in newcols.items():
                df[new_colname] = df[paramlist].max(axis=1)

        self.table_store[out_table] = DataTable(df=df, centroids=c)
Esempio n. 8
0
    def aggregateTime(self, out_table, mode, minutes=60 * 24, in_table='base'):
        df = self.table_store[in_table].df()  #type:pd.DataFrame
        c = self.table_store[in_table].centroids

        freq = '{}Min'.format(minutes)
        #df.index = pd.DatetimeIndex(df[TIME_ATTR], copy=True)
        by = pd.TimeGrouper(freq=freq)
        grouped = df.groupby(by)
        if mode == 'COUNT':
            out = grouped.count()
        elif mode == 'AVG':
            out = grouped.mean()
        elif mode == 'MIN':
            out = grouped.min()
        else:
            out = grouped.max()

        self.table_store[out_table] = DataTable(df=out, centroids=c)
Esempio n. 9
0
    def groupby2(self, out_table, attr, mode, in_table="base", bydate=False):
        df = self.table_store[in_table].df()  #type:pd.DataFrame
        c = self.table_store[in_table].centroids
        if bydate:
            by = df[attr].dt.normalize()
        else:
            by = df[attr]

        grouped = df.groupby(by)

        if mode is 'COUNT':
            out = grouped.count()
        elif mode is 'SUM':
            out = grouped.sum()
        else:
            out = grouped.max()
        #data.groupby().count()

        ## narrow data to the specified attributes
        #df = df[[attr1, attr2]]

        ## store results in new table
        self.table_store[out_table] = DataTable(df=out, centroids=c)
Esempio n. 10
0
 def link(self, out_table, in_table="base"):
     df = self.table_store[in_table].df()
     c = self.table_store[in_table].centroids
     ## store 'copy' in new table
     self.table_store[out_table] = DataTable(df=df, centroids=c)
Esempio n. 11
0
 def read_data(self, path):
     self.table_store["base"] = DataTable(path)
Esempio n. 12
0
 def select_ids(self, out_table, ids, in_table="base"):
     df = self.table_store[in_table].df()  # type:pd.DataFrame
     c = self.table_store[in_table].centroids
     df = df.loc[ids]
     self.table_store[out_table] = DataTable(df=df, centroids=c)
Esempio n. 13
0
 def store_df(self, df, name):
     self.table_store[name] = DataTable(df=df)
Esempio n. 14
0
def main(argv):
    # setup options from command line
    netType = 'ResNet'
    input_shape1 = 224
    input_shape2 = 224
    gaus = 0
    numberOfEpochs = 30
    hiddenUnits = 75
    patients_to_use = 'ALL'
    weights = ''
    folder = "D:\\deep learning dataset\\MS Fall Study"
    try:
        opts, args = getopt.getopt(argv, "?f:t:1:2:g:e:h:p:w:")
    except getopt.GetoptError:
        Help()
        return
    for opt, arg in opts:
        if opt == '-?':
            Help()
            return
        elif opt == '-f':
            folder = arg
        elif opt == '-w':
            weights = arg
        elif opt == '-t':
            netType = arg
        elif opt == '-1':
            input_shape1 = int(arg)
        elif opt == '-2':
            input_shape2 = int(arg)
        elif opt == '-g':
            gaus = float(arg)
        elif opt == '-e':
            numberOfEpochs = int(arg)
        elif opt == '-h':
            hiddenUnits = int(arg)
        elif opt == '-p':
            try:
                patients_to_use = int(arg)
            except ValueError:
                patients_to_use = arg

    if 'Plt' == netType:
        vis = Visualize(folder, patients_to_use, True)
        vis.run()
    elif 'Img' == netType:
        vis = Visualize(folder, patients_to_use, False)
        vis.run()
    elif 'Table' == netType:
        table = DataTable(folder)
        table.run()
    else:
        netTypeVal = GetType(netType)
        if NETTYPE_INVALID == netTypeVal:
            print('unknown tpye:', netType)
            return
        print(
            '##################################################################################'
        )
        print(
            '# {0:s} shape ({1:d}, {2:d}) epochs {3:d} gaus {4:f} hidden units {5:d}          #'
            .format(netType, input_shape1, input_shape2, numberOfEpochs, gaus,
                    hiddenUnits))
        print(
            '##################################################################################'
        )
        rgb = True
        twoD = False
        batchSize = 32
        input_shape = (input_shape1, input_shape2)
        activities_to_load = [
            "30s Chair Stand Test", "Tandem Balance Assessment",
            "Standing Balance Assessment", "Standing Balance Eyes Closed",
            "ADL: Normal Walking", "ADL: Normal Standing",
            "ADL: Normal Sitting", "ADL: Slouch sitting", "ADL: Lying on back",
            "ADL: Lying on left side", "ADL: Lying on right side"
        ]
        preLoader = MatrixPreLoader(dataset_directory=folder,
                                    patients_to_use=patients_to_use,
                                    activity_types=activities_to_load,
                                    print_loading_progress=True)
        num_features = preLoader.get_number_of_patients()
        if NETTYPE_VGGBN == netTypeVal:
            vgg = VGG16Imp()
            model = vgg.VGG16WithBN(input_shape=(input_shape1, input_shape2,
                                                 3),
                                    classes=num_features)
        elif NETTYPE_VGG16 == netTypeVal:
            model = VGG16(weights=None,
                          classes=num_features,
                          input_shape=(input_shape1, input_shape2, 3))
        elif NETTYPE_RESNET == netTypeVal:
            resnet = ResNetImp()
            model = resnet.ResNet((input_shape1, input_shape2, 3),
                                  num_features)
        elif NETTYPE_RESNETPT == netTypeVal or NETTYPE_RESNETPD == netTypeVal:
            resnet = ResNetImp()
            model = resnet.ResNetP(weights, (input_shape1, input_shape2, 3),
                                   num_features)
        elif NETTYPE_SIMPLE == netTypeVal:
            rgb = False
            twoD = True
            rnn = RNNImp(hiddenUnits)
            model = rnn.SimpleRNN(input_shape, num_features)
        elif NETTYPE_GRU == netTypeVal:
            rgb = False
            twoD = True
            rnn = RNNImp(hiddenUnits)
            model = rnn.GRU(input_shape, num_features)
        elif NETTYPE_LSTM == netTypeVal:
            rgb = False
            twoD = True
            rnn = RNNImp(hiddenUnits)
            model = rnn.LSTM(input_shape, num_features)
        else:
            return

        print("create_generators")
        training_gen, validation_gen = create_generators(
            preLoader, input_shape, rgb, twoD, gaus, batchSize)

        if NETTYPE_RESNETPD == netTypeVal:
            print("predict_with_generator")
            predict_with_generator(model, validation_gen, preLoader)
        else:
            print("train_model_with_generator")
            train_model_with_generator(model, training_gen, validation_gen,
                                       numberOfEpochs, netType)

            training_dist = training_gen.GetDistribution()
            validation_dist = validation_gen.GetDistribution()
            print('training distribution', training_dist)
            print('validation distribution', validation_dist)