Example #1
0
def compute_fscore(data_set_df, user_info_df, label='gender', min_not_nan=-1):
    df_filtered, y_v = pc.get_filtered_x_y(data_set_df, user_info_df, label)
    feature_fs = DataFrame(np.zeros(len(df_filtered)), index=df_filtered.index, columns=['importance'])
    i = 0
    for index, values in df_filtered.iterrows():
        try:
            if min_not_nan < 0:
                f_score, p_val = f_classif(values.fillna(values.mean())[:, np.newaxis], y_v)
                feature_fs.loc[index] = f_score if f_score != np.inf and f_score != -np.inf else np.nan
            else:
                nan_removed = values.dropna()
                if len(nan_removed) < min_not_nan:
                    feature_fs.loc[index] = np.nan
                else:
                    f_score, p_val = f_classif(nan_removed[:, np.newaxis], y_v[nan_removed.index.astype(int)])
                    feature_fs.loc[index] = f_score if f_score != np.inf and f_score != -np.inf else np.nan
            if float(i) % 10000 == 0 and i > 0:
                print "\t\t\t%s features are done" % i
            i += 1
            # print index, feature_fs.loc[index].values[0]
        except ValueError:
            # print "value error occurs during processing %r" % index
            continue
    feature_fs.sort_values('importance', ascending=False, inplace=True, na_position='last')
    return feature_fs
Example #2
0
def compute_mics(data_set_df, user_info_df, label='gender', min_not_nan=-1):
    df_filtered, y_v = pc.get_filtered_x_y(data_set_df, user_info_df, label)
    feature_mics = DataFrame(np.zeros(len(df_filtered)), index=df_filtered.index, columns=['importance'])
    i = 0
    for index, values in df_filtered.iterrows():
        # if len(feature_mics) > 1000:
        #     break
        m = minepy.MINE()
        try:
            if min_not_nan < 0:
                m.compute_score(values, y_v)
                feature_mics.loc[index] = m.mic()
            else:
                nan_removed = values.dropna()
                if len(nan_removed) < min_not_nan:
                    feature_mics.loc[index] = np.nan
                else:
                    m.compute_score(nan_removed, y_v[nan_removed.index.astype(int)])
                    feature_mics.loc[index] = m.mic()
            # if len(feature_mics) > 1000:
            #     break
            # if float(i) % 10000 == 0 and i > 0:
            #     print "\t\t\t%s features are done" % i
            i += 1
            # print index, feature_mics.loc[index].values[0]
        except ValueError:
            # print "value error occurs during processing %r" % index
            continue
    feature_mics.sort_values('importance', ascending=False, inplace=True, na_position='last')
    return feature_mics
Example #3
0
    def _decode_solutions(self, solutions):
        decoded_solutions = DataFrame(columns=["reactions", "knockouts", "fitness"])
        for index, solution in enumerate(solutions):
            reactions, knockouts = self._decoder(solution, flat=True)
            if len(reactions) > 0:
                decoded_solutions.loc[index] = [reactions, knockouts, solution.fitness]

        return decoded_solutions
Example #4
0
    def _decode_solutions(self, solutions):
        decoded_solutions = DataFrame(columns=["targets", "fitness"])
        index = 0
        for solution in solutions:
            combinations = self._decoder(solution.candidate, flat=True, decompose=True)
            for targets in combinations:
                if len(targets) > 0:
                    decoded_solutions.loc[index] = [tuple(targets), solution.fitness]
                    index += 1

        decoded_solutions.drop_duplicates(inplace=True, subset="targets")
        decoded_solutions.reset_index(inplace=True)

        return decoded_solutions
Example #5
0
def dojo_figures(options):
    """
    Create figures for a dojo table.
    Currently for all pseudos in the search space, the one with the best df per element is chosen.
    This should probably come from a dojotable eventually
    """
    pseudos = options.pseudos

    if False:
        """
        read the data from a data file instead of psp files
        """
        rows = []
        with open('data') as data_file:
            for line in data_file:
                line.rstrip('\n')
                #print(line)
                data = line.split(',')
                #print(data)
                data_dict = {'name': data[0],
                             'high_dfact_meV': float(data[1]),
                             'rell_high_dfact_meV': float(data[2]),
                             'high_dfactprime_meV': float(data[3])}
                if data[5] != 'nan':
                    data_dict['high_gbrv_bcc_a0_rel_err'] = float(data[5])
                    data_dict['high_gbrv_fcc_a0_rel_err'] = float(data[7])
                rows.append(data_dict)
    else:
	# Get data from dojoreport
	data_dojo, errors = pseudos.get_dojo_dataframe()

	if errors:
	    cprint("get_dojo_dataframe returned %s errors" % len(errors), "red")
	    if not options.verbose:
                print("Use --verbose for details.")
            else:
		for i, e in enumerate(errors):
                    print("[%s]" % i, e)

	# add data that is not part of the dojo report
	data_pseudo = DataFrame(columns=('nv', 'valence', 'rcmin', 'rcmax') )
	for index, p in data_dojo.iterrows():
	    outfile = p.filepath.replace('.psp8', '.out')
	    parser = OncvOutputParser(outfile)
	    parser.scan()
	    if not parser.run_completed:
		raise RuntimeError("[%s] Corrupted outfile")

	    data_pseudo.loc[index] = [parser.nv, parser.valence, parser.rc_min, parser.rc_max]

	data = concat([data_dojo, data_pseudo], axis=1)

    # Select "best" entries per element.
    rows, names = [], []
    sortby, ascending = "high_dfact_meV", True

    for name, group in data.groupby("symbol"):
        # Sort group and select best pseudo depending on sortby and ascending.
        select = group.sort_values(sortby, ascending=ascending).iloc[0]
        l = {k: getattr(select, k, None) for k in (
                                             'name', "symbol", 'Z',
                                             'high_b0_GPa', 'high_b1', 'high_v0', 'high_dfact_meV',
                                             'high_dfactprime_meV', 'high_ecut', 'high_gbrv_bcc_a0_rel_err',
                                             'high_gbrv_fcc_a0_rel_err', 'high_ecut',
                                             #'low_phonon', 'high_phonon',
                                             'low_ecut_hint', 'normal_ecut_hint', 'high_ecut_hint',
                                             'nv', 'valence', 'rcmin', 'rcmax')}
        for k, v in l.items():
            if v is None: cprint("[%s] Got None for %s" % (name, k), "red")

        names.append(name)
        rows.append(l)

    import matplotlib.pyplot as plt
    import matplotlib.cm as mpl_cm
    from pseudo_dojo.util.ptable_plotter import ElementDataPlotterRangefixer

    cmap = mpl_cm.cool
    color = 'black'
    cmap.set_under('w', 1.)

    # functions for plotting
    def rcmin(elt):
        """R_c min [Bohr]"""
        return elt['rcmin']

    def rcmax(elt):
        """R_c max [Bohr]"""
        return elt['rcmax']

    def ar(elt):
        """Atomic Radius [Bohr]"""
        return elt['atomic_radii'] * 0.018897161646320722

    def df(elt):
        """Delta Factor [meV / atom]"""
        return elt.get('high_dfact_meV', float('NaN'))

    def dfp(elt):
        """Delta Factor Prime"""
        return elt.get('high_dfactprime_meV', float('NaN'))

    def bcc(elt):
        """GBRV BCC [% relative error]"""
        try:
            return elt['high_gbrv_bcc_a0_rel_err'] if str(elt['high_gbrv_bcc_a0_rel_err']) != 'nan' else -99
        except KeyError:
            #print('bcc func fail: ', elt)
            return float('NaN')

    def fcc(elt):
        """GBRV FCC [% relative error]"""
        try:
            return elt['high_gbrv_fcc_a0_rel_err'] if str(elt['high_gbrv_fcc_a0_rel_err']) != 'nan' else -99
        except KeyError:
            #print('fcc func fail: ', elt)
            return float('NaN')

    def low_phon_with(elt):
        """Acoustic mode low_cut"""
        try:
            return elt['low_phonon'][0]
        except (KeyError, TypeError):
            #print('low_phon wiht func fail: ', elt)
            return float('NaN')

    def high_phon_with(elt):
        """AC mode [\mu eV]"""
        try:
            return elt['high_phonon'][0]*1000
        except (KeyError, TypeError):
            #print('high_phon with func fail: ', elt)
            return float('NaN')

    def high_ecut(elt):
        """ecut high [Ha]"""
        return elt.get('high_ecut_hint', float('NaN'))

    def low_ecut(elt):
        """ecut low [Ha]"""
        return elt.get('low_ecut_hint', float('NaN'))

    def normal_ecut(elt):
        """ecut normal [Ha]"""
        return elt.get('normal_ecut_hint', float('NaN'))

    els = []
    elsgbrv = []
    #elsphon = []
    rel_ers = []
    elements_data = {}

    for el in rows:
        symbol = el["symbol"]

        # Prepare data for deltafactor
        if el['high_dfact_meV'] is None:
            cprint('[%s] failed reading high_dfact_meV %s:' % (symbol, el['high_dfact_meV']), "magenta")
        else:
            if el['high_dfact_meV'] < 0:
                cprint('[%s] negative high_dfact_meV %s:' % (symbol, el['high_dfact_meV']), "red")
                print(symbol, el['high_dfact_meV'])
            #assert el['high_dfact_meV'] >= 0
            elements_data[symbol] = el
            els.append(symbol)

        # Prepare data for GBRV
        try:
            rel_ers.append(max(abs(el['high_gbrv_bcc_a0_rel_err']), abs(el['high_gbrv_fcc_a0_rel_err'])))
        except (TypeError, KeyError) as exc:
            cprint('[%s] failed reading high_gbrv:' % symbol, "magenta")
            if options.verbose: print(exc)

        try:
            if el['high_gbrv_bcc_a0_rel_err'] > -100 and el['high_gbrv_fcc_a0_rel_err'] > -100:
                elsgbrv.append(symbol)
        except (KeyError, TypeError) as exc:
            cprint('[%s] failed reading GBRV data for ' % symbol, "magenta")
            if options.verbose: print(exc)

        #try:
        #    if len(el['high_phonon']) > 2:
        #        elsphon.append(symbol)
        #except (KeyError, TypeError) as exc:
        #    cprint('[%s] failed reading high_phonon' % symbol, "magenta")
        #    if options.verbose: print(exc)

        #if symbol == "Br":
        #    print (elements_data[symbol])

    try:
        max_rel_err = 0.05 * int((max(rel_ers) / 0.05) + 1)
    except ValueError:
        max_rel_err = 0.20

    # plot the GBRV/DF results periodic table
    epd = ElementDataPlotterRangefixer(elements=els, data=elements_data)
    cm1 = mpl_cm.jet
    cm2 = mpl_cm.jet
    cm1.set_under('w', 1.0)
    epd.ptable(functions=[bcc, fcc, df], font={'color': color}, cmaps=[cm1, cm1, cm2],
               #clims=[[-max_rel_err, max_rel_err],[-max_rel_err, max_rel_err], [-20,20]])
               clims=[[-0.6,0.6],[-0.6, 0.6], [-4,4]])
    plt.show()

    # Test different color maps
    #for cm2 in [mpl_cm.PiYG_r, mpl_cm.PRGn_r,mpl_cm.RdYlGn_r]:
    #     epd = ElementDataPlotterRangefixer(elements=els, data=elements_data)
    #     epd.ptable(functions=[bcc,fcc,df], font={'color':color}, cmaps=[cm1,cm1,cm2],
    #           clims=[[-max_rel_err,max_rel_err],[-max_rel_err, max_rel_err], [0,3]])
    #     plt.show()
    #plt.savefig('gbrv.eps', format='eps')

    # plot the periodic table with deltafactor and deltafactor prime.
    epd = ElementDataPlotterRangefixer(elements=els, data=elements_data)
    epd.ptable(functions=[df, dfp], font={'color': color}, cmaps=cmap, clims=[[0, 6]])
    plt.show()
    #plt.savefig('df.eps', format='eps')

    # plot the GBVR results periodic table
    epd = ElementDataPlotterRangefixer(elements=elsgbrv, data=elements_data)
    epd.ptable(functions=[bcc, fcc], font={'color': color}, cmaps=mpl_cm.jet, clims=[[-max_rel_err, max_rel_err]])
    plt.show()
    #plt.savefig('gbrv.eps', format='eps')

    # plot the hints periodic table
    epd = ElementDataPlotterRangefixer(elements=els, data=elements_data)
    cm = mpl_cm.cool
    cm.set_under('w', 1.0)
    epd.ptable(functions=[low_ecut, high_ecut, normal_ecut], font={'color': color}, clims=[[6, 80]],  cmaps=cmap)
    plt.show()
    #plt.savefig('rc.eps', format='eps')

    # plot the radii periodic table
    epd = ElementDataPlotterRangefixer(elements=els, data=elements_data)
    epd.ptable(functions=[rcmin, rcmax, ar], font={'color': color}, clims=[[0, 4]], cmaps=cmap)
    plt.show()
    #plt.savefig('rc.eps', format='eps')

    # plot the acoustic mode periodic table
    #epd = ElementDataPlotterRangefixer(elements=elsphon, data=data)
    #cm = mpl_cm.winter
    #cm.set_under('orange', 1.0)
    #epd.ptable(functions=[high_phon_with], font={'color':color}, cmaps=cm, clims=[[-2, 0]])
    #plt.show()
    #plt.savefig('rc.eps', format='eps')

    return 0
Example #6
0
def dojo_figures(options):
    """
    Create figures for a dojo table.
    currently for all pseudo's in the search space the one with the best df per element is chosen 
    this should probably come from a dojotable eventually
    """
    pseudos = options.pseudos

    data_dojo, errors = pseudos.get_dojo_dataframe()

    # add data that is not part of the dojo report
    data_pseudo = DataFrame(columns=("nv", "valence", "rcmin", "rcmax"))
    for index, p in data_dojo.iterrows():
        out = p.name.replace("psp8", "out")
        outfile = p.symbol + "/" + out
        parser = OncvOutputParser(outfile)
        parser.scan()
        data_pseudo.loc[index] = [int(parser.nv), parser.valence, parser.rc_min, parser.rc_max]

    data = concat([data_dojo, data_pseudo], axis=1)

    """Select entries per element"""
    grouped = data.groupby("symbol")

    rows, names = [], []
    for name, group in grouped:

        if False:  # options.semicore
            select = group.sort("nv").iloc[-1]
        elif False:  # options.valence
            select = group.sort("nv").iloc[0]
        else:
            select = group.sort("high_dfact_meV").iloc[0]

        names.append(name)

        l = {
            k: getattr(select, k)
            for k in (
                "name",
                "Z",
                "high_b0_GPa",
                "high_b1",
                "high_v0",
                "high_dfact_meV",
                "high_dfactprime_meV",
                "high_ecut",
                "high_gbrv_bcc_a0_rel_err",
                "high_gbrv_fcc_a0_rel_err",
                "high_ecut",
                "low_phonon",
                "high_phonon",
                "low_ecut_hint",
                "normal_ecut_hint",
                "high_ecut_hint",
                "nv",
                "valence",
                "rcmin",
                "rcmax",
            )
        }
        rows.append(l)

    import matplotlib.pyplot as plt
    from ptplotter.plotter import ElementDataPlotter
    import matplotlib.cm as mpl_cm
    from matplotlib.collections import PatchCollection
    import numpy as np

    class ElementDataPlotterRangefixer(ElementDataPlotter):
        """
        modified plotter that alows to set the clim for the plot
        """

        def draw(self, colorbars=True, **kwargs):
            self.cbars = []
            clims = kwargs.get("clims", None)
            n = len(self.collections)
            if clims is None:
                clims = [None] * n
            elif len(clims) == 1:
                clims = [clims[0]] * n
            elif len(clims) == n:
                pass
            else:
                raise RuntimeError("incorrect number of clims provided in draw")
            for coll, cmap, label, clim in zip(self.collections, self.cmaps, self.cbar_labels, clims):
                # print(clim)
                pc = PatchCollection(coll, cmap=cmap)
                pc.set_clim(vmin=clim[0], vmax=clim[1])
                # print(pc.get_clim())
                pc.set_array(np.array([p.value for p in coll]))
                self._ax.add_collection(pc)

                if colorbars:
                    options = {"orientation": "horizontal", "pad": 0.05, "aspect": 60}

                    options.update(kwargs.get("colorbar-options", {}))
                    cbar = plt.colorbar(pc, **options)
                    cbar.set_label(label)
                    self.cbars.append(cbar)
            fontdict = kwargs.get("font", {"color": "white"})
            for s in self.squares:
                if not s.label:
                    continue
                x = s.x + s.dx / 2
                y = s.y + s.dy / 2
                self._ax.text(x, y, s.label, ha="center", va="center", fontdict=fontdict)

            qs_labels = [k.split("[")[0] for k in self.labels]

            if self.guide_square:
                self.guide_square.set_labels(qs_labels)
                pc = PatchCollection(self.guide_square.patches, match_original=True)
                self._ax.add_collection(pc)
            self._ax.autoscale_view()

    cmap = mpl_cm.cool
    color = "black"
    cmap.set_under("w", 1.0)

    # functions for plotting
    def rcmin(elt):
        """R_c min [Bohr]"""
        return elt["rcmin"]

    def rcmax(elt):
        """R_c max [Bohr]"""
        return elt["rcmax"]

    def ar(elt):
        """Atomic Radius [Bohr]"""
        return elt["atomic_radii"] * 0.018897161646320722

    def df(elt):
        """Delta Factor [meV / atom]"""
        try:
            return elt["high_dfact_meV"]
        except KeyError:
            return float("NaN")

    def dfp(elt):
        """Delta Factor Prime"""
        try:
            return elt["high_dfactprime_meV"]
        except KeyError:
            return float("NaN")

    def bcc(elt):
        """GBRV BCC [% relative error]"""
        try:
            v_bcc = elt["high_gbrv_bcc_a0_rel_err"] if str(elt["high_gbrv_bcc_a0_rel_err"]) != "nan" else -99
            #    print(v_bcc)
            return v_bcc
        except KeyError:
            print("bcc func fail: ", elt)
            return -99  # float('NaN')

    def fcc(elt):
        """GBRV FCC [% relative error]"""
        try:
            v_fcc = elt["high_gbrv_fcc_a0_rel_err"] if str(elt["high_gbrv_fcc_a0_rel_err"]) != "nan" else -99
            #    print(v_fcc)
            return v_fcc
        except KeyError:
            print("fcc func fail: ", elt)
            return -99  # float('NaN')

    def low_phon_with(elt):
        """Acoustic mode low_cut """
        try:
            return elt["low_phonon"][0]
        except (KeyError, TypeError):
            # print('low_phon wiht func fail: ', elt)
            return float("NaN")

    def high_phon_with(elt):
        """AC mode [\mu eV] """
        try:
            return elt["high_phonon"][0] * 1000
        except (KeyError, TypeError):
            # print('high_phon with func fail: ', elt)
            return float("NaN")

    def high_ecut(elt):
        """ecut high [Ha] """
        try:
            return elt["high_ecut_hint"]
        except (KeyError, TypeError):
            # print('high_ecut with func fail: ', elt)
            return float("NaN")

    def low_ecut(elt):
        """ecut low [Ha] """
        try:
            return elt["low_ecut_hint"]
        except (KeyError, TypeError):
            # print('low_ecut with func fail: ', elt)
            return float("NaN")

    def normal_ecut(elt):
        """ecut normal [Ha] """
        try:
            return elt["normal_ecut_hint"]
        except (KeyError, TypeError):
            # print('normal_ecut with func fail: ', elt)
            return float("NaN")

    els = []
    elsgbrv = []
    elsphon = []
    rel_ers = []
    elements_data = {}

    for el in rows:
        symbol = el["name"].split(".")[0].split("-")[0]
        rel_ers.append(max(abs(el["high_gbrv_bcc_a0_rel_err"]), abs(el["high_gbrv_fcc_a0_rel_err"])))
        if el["high_dfact_meV"] > 0:
            elements_data[symbol] = el
            els.append(symbol)
        else:
            print("failed reading df  :", symbol, el["high_dfact_meV"])
        if el["high_gbrv_bcc_a0_rel_err"] > -100 and el["high_gbrv_fcc_a0_rel_err"] > -100:
            elsgbrv.append(symbol)
        else:
            print("failed reading gbrv: ", symbol, el["high_gbrv_bcc_a0_rel_err"], el["high_gbrv_fcc_a0_rel_err"])
            # print(el)
        try:
            if len(el["high_phonon"]) > 2:
                elsphon.append(symbol)
        except (KeyError, TypeError):
            pass

    max_rel_err = max(rel_ers)

    # plot the GBRV/DF results periodic table
    epd = ElementDataPlotterRangefixer(elements=els, data=elements_data)
    cm1 = mpl_cm.jet
    cm2 = mpl_cm.cool
    cm1.set_under("w", 1.0)
    epd.ptable(
        functions=[bcc, fcc, df],
        font={"color": color},
        cmaps=[cm1, cm1, cm2],
        clims=[[-max_rel_err, max_rel_err], [-max_rel_err, max_rel_err], [0, 3]],
    )
    plt.show()
    # plt.savefig('gbrv.eps', format='eps')

    # plot the periodic table with df and dfp
    epd = ElementDataPlotterRangefixer(elements=els, data=elements_data)
    epd.ptable(functions=[df, dfp], font={"color": color}, cmaps=cmap, clims=[[0, 6]])
    plt.show()
    # plt.savefig('df.eps', format='eps')

    # plot the GBVR results periodic table
    epd = ElementDataPlotterRangefixer(elements=elsgbrv, data=elements_data)
    epd.ptable(functions=[bcc, fcc], font={"color": color}, cmaps=mpl_cm.jet, clims=[[-max_rel_err, max_rel_err]])
    plt.show()
    # plt.savefig('gbrv.eps', format='eps')

    # plot the hints periodic table
    epd = ElementDataPlotterRangefixer(elements=els, data=elements_data)
    cm = mpl_cm.cool
    cm.set_under("w", 1.0)
    epd.ptable(functions=[low_ecut, high_ecut, normal_ecut], font={"color": color}, clims=[[6, 80]], cmaps=cmap)
    plt.show()
    # plt.savefig('rc.eps', format='eps')

    # plot the radii periodic table
    epd = ElementDataPlotterRangefixer(elements=els, data=elements_data)
    epd.ptable(functions=[rcmin, rcmax, ar], font={"color": color}, clims=[[0, 4]], cmaps=cmap)
    plt.show()
    # plt.savefig('rc.eps', format='eps')

    # plot the accoustic mode periodic table
    epd = ElementDataPlotterRangefixer(elements=elsphon, data=data)
    cm = mpl_cm.winter
    cm.set_under("orange", 1.0)
    epd.ptable(functions=[high_phon_with], font={"color": color}, cmaps=cm, clims=[[-2, 0]])
    plt.show()
Example #7
0
def main():
    print "Main Method"
    
    data_df = pd.read_csv('test.csv')
    df = DataFrame(data = data_df)
    
    no_of_frames = df.ix[df['FrameNumber'].idxmax()]
#    print no_of_frames.FrameNumber
    
    grouped = df.groupby('FrameNumber')
    grouped.groups
    
#    print grouped
#    print grouped.get_group(2)
    dfGroups = []
    
    keys = grouped.groups.keys()
    for i, val in enumerate(keys):
        dfGroups.append(grouped.get_group(val))
        dfGroup = grouped.get_group(val)
#        print dfGroup
    
#    print len(dfGroups[1])

#    points = []
    dfGroupPoints = []
    rows_list = []
    df1 = DataFrame(columns=('1', '2','3','4','5','6','7','8','9','10'))

    for index in range(len(dfGroups)):
#    for index in range(2):
        dfGroupPoints = []
        for row, frame in dfGroups[index].iterrows():
            dfGroupPoints.append([[frame.FeatureXAxis,frame.FeatureYAxis]])
        print "Index No= " + str(index) + "        Frame Number: " + str(frame.FrameNumber)
#        print dfGroupPoints
        point1 = dfGroupPoints[12]
        point2 = dfGroupPoints[18]
        point3 = dfGroupPoints[16]
        point4 = dfGroupPoints[19]
        point5 = dfGroupPoints[13]
        point6 = dfGroupPoints[14]
        point7 = dfGroupPoints[20]
        point8 = dfGroupPoints[17]
        point9 = dfGroupPoints[21]
        point10 = dfGroupPoints[15]
        point11 = dfGroupPoints[23]
        point12 = dfGroupPoints[35]
        point13 = dfGroupPoints[28]
        point14 = dfGroupPoints[36]
        point15 = dfGroupPoints[24]
        point16 = dfGroupPoints[25]
        point17 = dfGroupPoints[39]
        point18 = dfGroupPoints[32]
        point19 = dfGroupPoints[40]
        point20 = dfGroupPoints[26]
        point21 = dfGroupPoints[37]
        point22 = dfGroupPoints[27]
        point23 = dfGroupPoints[38]
        point24 = dfGroupPoints[41]
        point25 = dfGroupPoints[31]
        point26 = dfGroupPoints[42]
        point27 = dfGroupPoints[0]
        point28 = dfGroupPoints[1]
        point29 = dfGroupPoints[3]
        point30 = dfGroupPoints[4]
        point31 = dfGroupPoints[56]
        point32 = dfGroupPoints[54]
        point33 = dfGroupPoints[57]
        point34 = dfGroupPoints[59]
        point35 = dfGroupPoints[55]
        point36 = dfGroupPoints[58]
        point37 = dfGroupPoints[60]
        point38 = dfGroupPoints[61]
        point39 = dfGroupPoints[62]
        point40 = dfGroupPoints[65]
        point41 = dfGroupPoints[64]
        point42 = dfGroupPoints[63]
        
        x = [point12[0][0], point18[0][0], point16[0][0], point19[0][0], point13[0][0]]
        y = [point12[0][1], point18[0][1], point16[0][1], point19[0][1], point13[0][1]]

        x1 = [point14[0][0], point20[0][0], point17[0][0], point21[0][0], point15[0][0]]
        y1 = [point14[0][1], point20[0][1], point17[0][1], point21[0][1], point15[0][1]]

#        print "Frame Number = " + str(index) 
        
#        print "Grediant of eyebrows = " + str(feature1(x,y,x1,y1))

        lN = normalizeFactor(point27,point28)
        points1to20 = [point1,point2,point3,point4,point5,point6,point7,point8,point9,point10,point11,point12,point13,point14,point15,point16,point17,point18,point19,point20]

#        print "Normalizing Factor = " +  str(lN)
        
#        print "Distance between eyebrows and eyes = " + str(feature2(points1to20, lN))
    
#        print "Area betweens the eyes = " + str(areaBetweenEyes(point5,point6,point16,point17,lN))
#        print "Area of the eyes = " + str(areaOfEyes(point11,point12,point13,point14,point15,point23,point22,point21,point16,point17,point18,point19,point20,point26,point25,point24,lN))
#        print "Vertical to Horizonatal Ratio of Eyes = " + str(VTHRofEyes(point22,point13,point15,point11,point25,point18,point20,point16))
#        print "Area of the circumferance of the mouth = " + str(areaCircumOfMouth(point1,point2,point3,point4,point5,point6,point7,point8,lN))
#        print "Vertical to Horizontal ration of mouth = " + str(VTHRofCircMouth(point1,point2,point3,point4))
#        print "Vertical position of the corner of the mouth = " + str(VposOfMouth(point29,point30,point31,point32,point33,point34,point35,point36,point37,point38,point39,point40,point41,point42,lN))
    
        feature1 = gradEyes(x,y,x1,y1)
        feature2 = distEyesEyebrows(points1to20, lN)
        feature3 = areaBetweenEyes(point5,point6,point15,point16,lN)
        feature4 = areaOfEyes(point11,point12,point13,point14,point15,point23,point22,point21,point16,point17,point18,point19,point20,point26,point25,point24,lN)
        feature5 = vTHRofEyes(point22,point13,point15,point11,point25,point18,point20,point16)
        feature6 = areaCircumOfMouth(point29,point31,point32,point33,point30,point34,point35,point36,lN)
        feature7 = areaCircumOfMouth(point29,point37,point38,point39,point30,point40,point41,point42,lN)
        feature8 = vTHRofCircMouth(point35,point32,point30,point29)
        feature9 = vTHRofCircMouth(point41,point38,point30,point29)
        feature10 = vposOfMouth(point29,point30,point31,point32,point33,point34,point35,point36,point37,point38,point39,point40,point41,point42,lN)
#        feature11 = lN
        
        df1.loc[index] = [feature1,feature2,feature3,feature4,feature5,feature6,feature7,feature8,feature9,feature10]
        rows_list.append([feature1,feature2,feature3,feature4,feature5,feature6,feature7,feature8,feature9,feature10])
    
    rows_list_float = np.array(rows_list, dtype=float)

#    y_pred = KMeans(n_clusters=3).fit_predict(df1)
#    print y_pred

#    print y_pred.cluster_centers_ 

    a = 0
    x = 0
    y = 0
    z = 0
#    j = 0
#    for i in range(len(y_pred)):
#        if y_pred[i] == 0:
#            x += 1
#        if y_pred[i] == 1:
#            y += 1
##            print i
##            print df1.iloc[[i]]
#        if y_pred[i] == 2:
#            z += 1
##            print i
##            print df1.iloc[[i]]
#    print x
#    print y
#    print z
#    print df1

    min_max_scaler = preprocessing.MinMaxScaler()
    df1_normalize = min_max_scaler.fit_transform(df1)
    df1_new = df1_normalize[0:100]
    print df1_new
#    print df1_normalize
#    x_pred = min_max_scaler.fit_predict(df1)
    x_pred = KMeans(n_clusters=2).fit_predict(df1_new)
    
    for i in range(len(x_pred)):
        if x_pred[i] == 0:
            x += 1
#            print i
        if x_pred[i] == 1:
            y += 1
            print i
#            print df1.iloc[[i]]
        if x_pred[i] == 2:
            z += 1
        if x_pred[i] == 2:
            a += 1
#    print a
    print x
    print y
#    print z
    
    print "Finished"