Ejemplo n.º 1
0
    def anchor(self, drifting_t, anchored_t):
        """
        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

        o -- [ D ] -- o  ==>  o -- [ A ] -- o

        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

        Anchor `drifting_t` at `anchored_t`

        Parameters
        ----------
        drifting_t :
            Drifting time to anchor
        anchored_t :
            When to anchor `drifting_t`

        """

        drifting_t = T(drifting_t)
        anchored_t = T(anchored_t)

        assert (drifting_t in self) and (drifting_t.drifting)
        assert anchored_t.anchored

        if anchored_t not in self:
            self.add_node(anchored_t)

        self._merge(drifting_t, anchored_t)
Ejemplo n.º 2
0
 def _outfFieldsValue(self, field_names, outdir):
     #Inital 2d array for PCA input, with all elements equal to 0
     #Rows = number of features,  Cols = fields used for PCA analysis
     print "Start collecting attributes' values from the table of " + self._featureName
     stTime = T()
     num_of_PCA_vars = len(field_names)
     AllData = [[0 for i in range(num_of_PCA_vars)]
                for i in range(self.num_of_features)]
     #Get fields value from the feature dataset
     with arcpy.da.SearchCursor(self.dataPath,
                                field_names) as cursor:  #@UndefinedVariable
         rowNum = 0
         for row in cursor:
             for colNum in xrange(num_of_PCA_vars):
                 AllData[rowNum][colNum] = row[colNum]
             rowNum += 1
     outfPath = os.path.join(outdir, "Attribute_Table.txt")
     outf = open(outfPath, 'w')
     # -----output format-----
     # Number of features: XX
     # Fields: field1    field2    field3    field4    ...
     # feature1_field1_value    feature1_field2_value    feature1_field3_value    feature1_field4_value    ...
     # feature2_field1_value    feature2_field2_value    feature2_field3_value    feature2_field4_value    ...
     # ...
     outString = "Number of features: " + str(self.num_of_features) + "\n"
     outString += "Fields: " + '\t'.join(field_names) + '\n'
     outString += '\n'.join('\t'.join(str(x) for x in y) for y in AllData)
     outf.write(outString)
     outf.close()
     print "All required attributes' values are written to " + outfPath + ". Elapsed Time: " + timer(
         stTime, T()) + "\n"
     return outfPath
Ejemplo n.º 3
0
    def add_edge(self, t1, t2, key=None, attr_dict=None, **attrs):
        """Add annotation to the graph between times t1 and t2

        Parameters
        ----------
        t1, t2: float, str or None
        data : dict, optional
            {annotation_type: annotation_value} dictionary

        Example
        -------
        >>> G = Transcription()
        >>> G.add_edge(T(1.), T(), speaker='John', 'speech'='Hello world!')
        """
        t1 = T(t1)
        t2 = T(t2)

        # make sure Ts are connected in correct chronological order
        if t1.anchored and t2.anchored:
            assert t1 <= t2

        super(Transcription, self).add_edge(t1,
                                            t2,
                                            key=key,
                                            attr_dict=attr_dict,
                                            **attrs)
Ejemplo n.º 4
0
def omm2osm(inputfile, outputfile):
    import xml.etree.cElementTree as ET  # for xml
    import os  # for detecting if file is empty
    from time import time as T  # for elapsed time

    # start timer
    start = T()

    #create output file if not exists
    try:
        tempfile = open(outputfile, 'r')
    except FileNotFoundError:
        tempfile = open(outputfile, 'w')
        tempfile.write('')
        tempfile.close()

    #create root xml root if not exists
    output = open(outputfile, 'ab+')
    if os.path.getsize(outputfile) == 0:
        temproot = ET.Element('osm')
        temproot.attrib['version'] = '0.6'
        temproot.attrib['generator'] = 'omconvert 0.1'
        temproot.attrib['copyright'] = 'OpenStreetMap and contributors'
        temproot.attrib[
            'attribution'] = 'http://www.openstreetmap.org/copyright'
        temproot.attrib[
            'license'] = 'http://opendatacommons.org/licenses/odbl/1-0/'
        temptree = ET.ElementTree(temproot)
        temptree.write(outputfile,
                       encoding='utf-8',
                       method='xml',
                       short_empty_elements=False)
        output.write(b'\n')
    output.seek(-7, 2)
    output.truncate()

    input = open(inputfile, 'r')
    for line in input:
        stringoutput = m2s(line)
        if line.split('>')[0] == '0':
            stringoutput = b'\n' + stringoutput + b'\n'
        elif line.split('>')[0] == '2':
            stringoutput = stringoutput[:-1]
        output.write(stringoutput.replace(b'&#10;"', b'"'))

    output.write(b'\n</osm>')
    output.close()
    print('Elapsed Time: ' + str(T() - start) + 's')
Ejemplo n.º 5
0
    def pre_align(self, t1, t2, t):
        """
        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

        p -- [ t1 ]       p         [ t1 ]
                            ⟍     ⟋
                     ==>     [ t ]
                            ⟋     ⟍
        p' -- [ t2 ]      p'        [ t2 ]

        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

        """

        t1 = T(t1)
        t2 = T(t2)
        t = T(t)

        # make sure --[t1] incoming edges are empty
        # because they're going to be removed afterwards,
        # and we don't want to loose data
        pred1 = self.predecessors(t1)
        for p in pred1:
            for key, data in self[p][t1].iteritems():
                assert not data

        # make sure --[t2] incoming edges are empty
        # (for the same reason...)
        pred2 = self.predecessors(t2)
        for p in pred2:
            for key, data in self[p][t2].iteritems():
                assert not data

        # let's get started (remove all incoming edges)
        for p in pred1:
            for key in list(self[p][t1]):
                self.remove_edge(p, t1, key=key)
        for p in pred2:
            for key in list(self[p][t2]):
                self.remove_edge(p, t2, key=key)

        for p in set(pred1) | set(pred2):
            self.add_edge(p, t)

        self.add_edge(t, t1)
        self.add_edge(t, t2)
Ejemplo n.º 6
0
    def post_align(self, t1, t2, t):
        """
        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

        [ t1 ] -- s       [ t1 ]         s
                                ⟍     ⟋
                     ==>         [ t ]
                                ⟋     ⟍
        [ t2 ] -- s'      [ t2 ]        s'

        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        """

        t1 = T(t1)
        t2 = T(t2)
        t = T(t)

        # make sure [t1]-- outgoing edges are empty
        # because they're going to be removed afterwards,
        # and we don't want to loose data
        succ1 = self.successors(t1)
        for s in succ1:
            for key, data in self[t1][s].iteritems():
                assert not data

        # make sure --[t2] outgoing edges are empty
        # (for the same reason...)
        succ2 = self.successors(t2)
        for s in succ2:
            for key, data in self[t2][s].iteritems():
                assert not data

        # let's get started (remove all outgoing edges)
        for s in succ1:
            for key in list(self[t1][s]):
                self.remove_edge(t1, s, key=key)
        for s in succ2:
            for key in list(self[t2][s]):
                self.remove_edge(t2, s, key=key)

        for s in set(succ1) | set(succ2):
            self.add_edge(t, s)

        self.add_edge(t1, t)
        self.add_edge(t2, t)
Ejemplo n.º 7
0
    def align(self, one_t, another_t):
        """
        Align two (potentially drifting) times
        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

        o -- [ F ] -- o      o          o
                               ⟍     ⟋
                        ==>     [ F ]
                               ⟋     ⟍
        o -- [ f ] -- o      o          o

        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

        Parameters
        ----------
        one_t, another_t
            Two times to be aligned.

        Notes
        -----
        * If both `one_t` and  `another_t` are drifting, the resulting graph
        will no longer contain `one_t`.
        * In case `another_t` is anchored, `align` is equivalent to `anchor`.
        * `one_t` and `another_t` cannot be both anchored.

        """

        one_t = T(one_t)
        another_t = T(another_t)

        assert one_t in self
        assert another_t in self

        # first time is drifting
        if one_t.drifting:
            self._merge(one_t, another_t)

        # second time is drifting
        elif another_t.drifting:
            self._merge(another_t, one_t)

        # both times are anchored --> FAIL
        else:
            raise ValueError('Cannot align two anchored times')
Ejemplo n.º 8
0
 def grow(self, num=0, layer=0):
     if (num != 0):
         layer = 9999
     elif (num == 0 and layer == 0):
         layer = 9999
         num = self._featureAmount
     else:
         num = sys.maxsize
         pass
     print "\nStart Organic Growth from current layer " + str(
         self.currentLayer) + " (Number of features: " + str(
             self.featureCount) + ")."
     stTime = T()
     for layerIndex in xrange(self.currentLayer + 1, layer + 1):
         # Look for all first order neighbors of current grown region
         currentSet = self.layers[layerIndex - 1]
         if (currentSet == 0):
             baseLayerSet = currentSet
             excludedNeighborSet = currentSet
             newLayerSet = self._findNextFirstOrderNeighborLayer(
                 baseLayerSet, excludedNeighborSet, self._adjDict)
         else:
             prevSet = self.layers[layerIndex - 2]
             baseLayerSet = currentSet
             excludedNeighborSet = currentSet | prevSet
             newLayerSet = self._findNextFirstOrderNeighborLayer(
                 baseLayerSet, excludedNeighborSet, self._adjDict)
         # Update current layer number and total included features
         self.currentLayer += 1
         self.totalSet = self.totalSet | newLayerSet
         # Look for all feature that are not classified as first order neighbors of previous grown region but surrounded by the new grown regions
         enclosedFeatures = self._findWithinFeatures(self.totalSet)
         if (len(enclosedFeatures) > 0):
             self.totalSet = self.totalSet | enclosedFeatures
             newLayerSet = newLayerSet | enclosedFeatures
         # Add the newly found layer to the original region to grow it
         self.layers.append(newLayerSet)
         self.featureCount += len(newLayerSet)
         self.increasedLayerNumberList.append(len(newLayerSet))
         if (self.featureCount >= num):
             break
     print "\nOrganic Growth analysis has completed. " + "\nCurrent layer: " + str(
         self.currentLayer) + "\nNumber of features: " + str(
             self.featureCount) + "\nElapsed time: " + timer(stTime, T())
Ejemplo n.º 9
0
 def _outfSpatialContiguityMatrix(self, spatialContRel, outdir):
     print "Start producing spatial contiguity matrix for " + self._featureName + ".\nTotally " + str(
         self.num_of_features) + " number of features."
     stTimeEntireOp = T()
     baselyr = os.path.join(self._scratchFolder, "BaseLayer.lyr")
     arcpy.MakeFeatureLayer_management(self.dataPath, baselyr)
     outString = ""
     for i in xrange(self.num_of_features):
         if (i % 100 == 0):
             stTime1Feature = T()
         selectedlyr = os.path.join(self._scratchFolder,
                                    "SelectedLayer_" + str(i) + ".lyr")
         arcpy.MakeFeatureLayer_management(
             self.dataPath, selectedlyr, '\"' + self.ObjIDField + '\" = ' +
             str(i + self.ObjIDInitialValue))
         arcpy.SelectLayerByLocation_management(baselyr, spatialContRel,
                                                selectedlyr)
         outString += str(i) + ": "
         neighborFeatureList = []
         with arcpy.da.SearchCursor(
                 baselyr, ("OID@")) as cursor:  #@UndefinedVariable
             for row in cursor:
                 # row only contain 1 element, ObjectID
                 cKey = row[0] - self.ObjIDInitialValue
                 if (i != cKey):
                     neighborFeatureList.append(cKey)
             outString += ', '.join(str(x)
                                    for x in neighborFeatureList) + '\n'
         if (i % 100 == 99):
             print "100 features' neighbors are identified (total: " + str(
                 i + 1) + "). Elapsed Time: " + timer(stTime1Feature, T())
         arcpy.SelectLayerByAttribute_management(baselyr, "CLEAR_SELECTION")
     outfPath = os.path.join(
         outdir, "Spatial_Contiguity_" + spatialContRel + ".txt")
     outf = open(outfPath, 'w')
     # -----Output format------
     # 0 (feature id, defaultly starting from 0): neighbor1_id, neighbor2_id, ...
     # 1: neighbor1_id, neighbor2_id, ...
     # ...
     outf.write(outString)
     outf.close()
     print "Spatial Contiguity matrix has been written into " + outfPath + ". Elapsed time: " + timer(
         stTimeEntireOp, T()) + "\n"
     return outfPath
Ejemplo n.º 10
0
def osc2omc(inputfile, outputfile):
    import xml.etree.cElementTree as ET  # xml processing
    from time import time as T  # timer

    # start timer
    start = T()
    output = open(outputfile, 'a')

    # parse xml
    context = ET.iterparse(inputfile, events=("start", "end"))
    context = iter(context)
    event, root = next(context)

    for event, elem in context:
        if event == "start" and (elem.tag == "create" or elem.tag == "modify"
                                 or elem.tag == "delete"):
            # determine modification action
            if elem.tag == 'create':
                output.write('3: \n')
                root.clear()
                continue
            elif elem.tag == 'modify':
                output.write('4: \n')
                root.clear()
                continue
            elif elem.tag == 'delete':
                output.write('5: \n')
                root.clear()
                continue
            else:
                print('Modification method not identified.')
                quit()
        if event == "end" and (elem.tag == "node" or elem.tag == "way"
                               or elem.tag == "relation"):
            # write to file
            output.write(m2s(elem))
            root.clear()

    # close file
    output.close()
    print('Elapsed Time: ' + str(T() - start) + 's')
Ejemplo n.º 11
0
	def run(self):
		proc_name = self.name
		while True:
			next_task = self.task_queue.get()
			if next_task is None:
				# Poison pill means shutdown
				print '%s: Exiting' % proc_name
				self.task_queue.task_done()
				break
			start = T()
			answer = next_task()
			end = T()
			print '%s : %s : %s' % (proc_name, next_task, end-start)
			self.task_queue.task_done()
			if next_task.task_type == 'train':
				self.result_queue_tr.put(answer)
			elif next_task.task_type == 'test':
				self.result_queue_te.put(answer)
			else:
				raise ValueError('Incorrect task type: {}'.format(next_task.task_type))
		return
Ejemplo n.º 12
0
def osm2omm(inputfile, outputfile):
    import xml.etree.cElementTree as ET  # xml processing
    from time import time as T  # timer

    # start timer
    start = T()
    output = open(outputfile, 'a')

    # parse xml
    context = ET.iterparse(inputfile, events=("start", "end"))
    context = iter(context)
    event, root = next(context)

    for event, elem in context:
        if event == "end" and (elem.tag == "node" or elem.tag == "way"
                               or elem.tag == "relation"):
            output.write(s2m(elem))
            root.clear()

    # close file
    output.close()
    print('Elapsed time: ' + str(T() - start) + 's')
def parallel_video_cropping(self):
    """
    Launches four cpus to paralizes the video cropping

    INPUTS
    ------
    self: an instance of the class VideoPipelineNew

    IMPORTANT
    ---------
    Code assumes you want a five minute long subclip but if instead want a different length,
    just change the variable subclip_length below to whatever one you used, make sure to change
    all subclip_length variables in other functions (see five_min_subclips)
    """
    from time import time as T
    subclip_length = self.subclip_length
    n_cpus = 4  # if computer has more than 4 cpus, change this
    p = Pool(n_cpus)
    func_in = zip([self] * int(self.clip_duration / subclip_length),
                  xrange(int(self.clip_duration / subclip_length)))
    s1 = T()
    p.map(crop_vid, func_in)
    s2 = T()
    print 'parallel time', (s2 - s1)
Ejemplo n.º 14
0
    def relabel_drifting_nodes(self, mapping=None):
        """Relabel drifting nodes

        Parameters
        ----------
        mapping : dict, optional
            A dictionary with the old labels as keys and new labels as values.

        Returns
        -------
        g : Transcription
            New annotation graph
        mapping : dict
            A dictionary with the new labels as keys and old labels as values.
            Can be used to get back to the version before relabelling.
        """

        if mapping is None:
            old2new = {n: T() for n in self.drifting()}
        else:
            old2new = dict(mapping)

        new2old = {new: old for old, new in old2new.iteritems()}
        return nx.relabel_nodes(self, old2new, copy=True), new2old
Ejemplo n.º 15
0
    def timerange(self, t1, t2, inside=True, sort=None):
        """Infer edge timerange from graph structure

        a -- ... -- [ t1 ] -- A -- ... -- B -- [ t2 ] -- ... -- b

        ==> [a, b] (inside=False) or [A, B] (inside=True)

        Parameters
        ----------
        t1, t2 : anchored or drifting times
        inside : boolean, optional

        Returns
        -------
        segment : Segment
        """

        t1 = T(t1)
        t2 = T(t2)

        # in case it is not provided, compute temporal sort
        if sort is None:
            sort = self.temporal_sort()

        # if edge start is anchored, use it as start time
        if t1.anchored:
            start = t1

        # otherwise, look for the closest anchored time in temporal order:
        # - just after if inside is True
        # - just before otherwise
        else:
            start = None
            # find time index in temporal sort
            istart = sort.index(t1)
            # search just before or just after depending on 'inside' value
            search = sort[istart + 1:] if inside else sort[istart - 1::-1]
            for t in search:
                if t.anchored:
                    start = t
                    break
            # if we could not find any anchored time
            # use document end of start depending on 'inside' value
            if start is None:
                start = TEnd if inside else TStart

        # same treatment for the other end of edge
        if t2.anchored:
            end = t2
        else:
            end = None
            iend = sort.index(t2)
            search = sort[iend - 1::-1] if inside else sort[iend + 1:]
            for t in search:
                if t.anchored:
                    end = t
                    break
            if end is None:
                end = TStart if inside else TEnd

        # return a 'Segment'
        return Segment(start=start, end=end)
Ejemplo n.º 16
0
def omc2osc(inputfile, outputfile):
    import xml.etree.cElementTree as ET  # xml process
    import os  # check if file is empty
    from time import time as T  # timer

    # start timer
    start = T()
    input = open(inputfile, 'r')

    # create file if not exists
    try:
        tempfile = open(outputfile, 'r')
    except FileNotFoundError:
        tempfile = open(outputfile, 'w')
        tempfile.write('')
        tempfile.close()

    # create root for xml
    output = open(outputfile, 'ab+')
    if os.stat(outputfile).st_size == 0:
        temproot = ET.Element('osmChange')
        temproot.attrib['version'] = '0.6'
        temproot.attrib['generator'] = 'omconvert 0.1'
        treetemp = ET.ElementTree(temproot)
        treetemp.write(outputfile, method='xml', short_empty_elements=False)
        output.write(b'\n')
    output.seek(-13, 2)
    output.truncate()

    # parse xml
    input = open(inputfile, 'r')
    mfindex = []
    mfindex.append('')
    mfindex.append('')

    for line in input:
        # determine modification action
        if line.find('3: ') != -1:
            mfindex[1] = 3
            output.write(endlast(mfindex[0]).encode('utf-8'))
            mfindex[0] = mfindex[1]
            output.write(b'\n<create>\n')
        elif line.find('4: ') != -1:
            mfindex[1] = 4
            output.write(endlast(mfindex[0]).encode('utf-8'))
            mfindex[0] = mfindex[1]
            output.write(b'\n<modify>\n')
        elif line.find('5: ') != -1:
            mfindex[1] = 5
            output.write(endlast(mfindex[0]).encode('utf-8'))
            mfindex[0] = mfindex[1]
            output.write(b'\n<delete>\n')
        else:
            stringoutput = m2s(line)
            output.write(stringoutput.replace(b'&#10;"', b'"'))

    if mfindex[0] == 3:
        output.write(b'</create>')
    if mfindex[0] == 4:
        output.write(b'</modify>')
    if mfindex[0] == 5:
        output.write(b'</modify>')
    output.write(b'\n</osmChange>')
    output.close()
    print('Elapsed Time: ' + str(T() - start) + 's')
X = SS().fit_transform(X)

# ## Train the Support Vector Classifier

# In[10]:

from sklearn.svm import SVC

# Hyperparameters
kernel = 'rbf'
C = 13
gamma = 0.325

from time import time as T
start = T()
model = SVC(kernel=kernel, C=C, gamma=gamma)
clf = model.fit(X_train, Y_train)
end = T()

pred = clf.predict(X_test)
mScore = clf.score(X_test, Y_test)
print(f'Score against Testing Data: {mScore * 100:.3f}%')
print(f'Model took {(end-start)*1000:.3f}ms to train')

# ### Generate Classification Report

# In[11]:

from sklearn.metrics import classification_report as CR
Ejemplo n.º 18
0
    def crop(self, source, target=None):
        """Get minimum subgraph between source time and target time

        Parameters
        ----------
        source : Segment
        target : float or str, optional

        Returns
        -------
        g : Transcription
            Sub-graph between source and target
        """

        if isinstance(source, Segment):
            source, target = source.start, source.end

        source = T(source)
        target = T(target)

        # sorted list of anchored times will be needed later
        # make sure it is computed only once
        if source.anchored or target.anchored:
            anchored = sorted(self.anchored())

        # ~~~ from_source = set of nodes reachable from source ~~~~~~~~~~~~~~~~

        # source is drifting
        if source.drifting:

            if source not in self:
                raise ValueError(
                    'Drifting time %s is not in the transcription.' % source)
            else:
                from_source = {source} | nx.algorithms.descendants(
                    self, source)

        # source is anchored
        else:

            # if source is in graph, then it is easy
            if source in self:
                from_source = {source} | nx.algorithms.descendants(
                    self, source)

            # if source is not in graph,
            # find anchored time just before source
            else:
                if source < anchored[0]:
                    from_source = set(self)  # take no risk!
                else:
                    before = [n for n in anchored if n <= source][-1]
                    from_source = {before} | nx.algorithms.descendants(
                        self, before)

        # ~~~ to_target = set of nodes from which target is reachable ~~~~~~~~~

        # target is drifting
        if target.drifting:

            if target not in self:
                raise ValueError(
                    'Drifting time %s is not in the transcription.' % target)
            else:
                to_target = {target} | nx.algorithms.ancestors(self, target)

        else:

            # if target is in graph, then it is easy
            if target in self:
                to_target = {target} | nx.algorithms.ancestors(self, target)

            # if target is not in graph,
            # find anchored time just after target
            else:
                if target > anchored[-1]:
                    to_target = set(self)  # take no risk!
                else:
                    after = [n for n in anchored if n >= target][0]
                    to_target = {after} | nx.algorithms.ancestors(self, after)

        # union of source, target and source-to-target paths
        nbunch = from_source & to_target

        return self.subgraph(nbunch)
Ejemplo n.º 19
0
                        default='unet',
                        help='checkpoint prefix')
    parser.add_argument('--out_dir',
                        type=str,
                        default='out',
                        help='dir to restore results')
    args = parser.parse_args()
    step = args.step

    sym, arg_params, aux_params = mx.model.load_checkpoint(
        args.prefix, args.restore_step)
    # print(sym.list_outputs())
    mod = mx.mod.Module(symbol=sym, label_names=None, context=mx.gpu())
    mod.bind(for_training=False,
             data_shapes=[('data', (1, 3, step, step))],
             label_shapes=mod._label_shapes)
    mod.set_params(arg_params, aux_params, allow_missing=True)

    Batch = namedtuple('Batch', ['data'])
    imgns = [i for i in os.listdir(args.test_path) if i[-4:] == '.jpg']

    dic = {}
    for img_name in imgns[:]:
        start = T()
        name = img_name[:-9]
        print('test {}'.format(img_name))
        filename = os.path.join(args.test_path, img_name)
        label = predict(filename, dic)
        imsave(os.path.join(args.out_dir, img_name[:-4] + '_class.png'), label)
#        print('save to {}.tif, using {}s'.format(img_name[:-4]+'_class.tif', T() - start))
Ejemplo n.º 20
0
 def from_json(cls, data):
     graph = node_link_graph(data[PYANNOTE_JSON_TRANSCRIPTION])
     mapping = {node: T(node) for node in graph}
     graph = nx.relabel_nodes(graph, mapping)
     return cls(graph=graph, **graph.graph)
Ejemplo n.º 21
0
df = pd.read_csv('../data/featuressimple.csv')
X = df.drop('vehicle_list_price',axis =1)
y = df.vehicle_list_price

### Train test splits
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42)

#For regressors that needs standarlization
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("-"*100)
print("Linear regression...")
start = T()
reg_linear.fit(X_train,y_train)
y_pred = reg_linear.predict(X_test)
print(f"R2: {r2_score(y_test,y_pred): .2f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test,y_pred)):.2f}")
print(f"Finished in {T()- start: .2f} seconds")

print("-"*100)
print("Ridge regression...")
start = T()
reg_ridge.fit(X_train_scaled,y_train)
y_pred = reg_ridge.predict(X_test_scaled)
print(f"R2: {r2_score(y_test,y_pred): .2f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test,y_pred)):.2f}")
print(f"Finished in {T()- start: .2f} seconds")
        # Make it into a dataframe
    df = pd.concat(lunge_per_well, axis=1)

    # Make a file if its not there
    excel_root = os.path.join(root, name) + '\\lunge_excel_output.xlsx'

    if not os.path.exists(excel_root):
        writer = pd.ExcelWriter(excel_root)  # +'output.xlsx')
        df.to_excel(writer, name)
        writer.save()
        writer.close()

    # DO NOT OVER WRITE DATA, CHECK IF FILE IS THERE ALREADY!!!
    elif os.path.exists(excel_root):
        book = load_workbook(excel_root)
        writer = pd.ExcelWriter(excel_root, engine='openpyxl')
        writer.book = book
        df.to_excel(writer, sheet_name=name)
        writer.save()
        writer.close()


# Basically means, run this if you're not import functions from this script e.g. you hit run
# instead of in another script typing from VideoPipelineFinal import parallel_video_cropping etc.
if __name__ == '__main__':
    from time import time as T
    s = T()
    self = VideoPipelineNew()
    print 'Program took this long to run: ' + str(time() - s)
    parallel_video_cropping(self)
    self.matlab_stuff()