Ejemplo n.º 1
0
 def make_temp_filenames(self, temp_dir=None):
     tth_filen = make_filename(self.img_filename, 'tth.cake.npy',
                               temp_dir=temp_dir)
     azi_filen = make_filename(self.img_filename, 'azi.cake.npy',
                               temp_dir=temp_dir)
     int_filen = make_filename(self.img_filename, 'int.cake.npy',
                               temp_dir=temp_dir)
     return tth_filen, azi_filen, int_filen
Ejemplo n.º 2
0
 def make_temp_filenames(self, temp_dir=None):
     if not os.path.exists(temp_dir):
         os.makedirs(temp_dir)
     bgsub_filen = make_filename(self.fname, 'bgsub.chi',
                                 temp_dir=temp_dir)
     bg_filen = make_filename(self.fname, 'bg.chi',
                              temp_dir=temp_dir)
     return bgsub_filen, bg_filen
Ejemplo n.º 3
0
 def make_temp_filenames(self, temp_dir=None):
     if not os.path.exists(temp_dir):
         os.makedirs(temp_dir)
     tth_filen = make_filename(self.img_filename, 'tth.cake.npy',
                               temp_dir=temp_dir)
     azi_filen = make_filename(self.img_filename, 'azi.cake.npy',
                               temp_dir=temp_dir)
     int_filen = make_filename(self.img_filename, 'int.cake.npy',
                               temp_dir=temp_dir)
     return tth_filen, azi_filen, int_filen
Ejemplo n.º 4
0
def download(media_url, folder, audio=True, video=True, subtitles=False, **kwargs):
    """ Download the streams for the media url.

    :param str media_url: the url.
    :param str folder: the local folder for the output.
    :param bool audio: download audio.
    :param bool video: download video.
    :param bool subtitles: download subtitles.
    :param kwargs: additional video properties (see `streams`).
    :return: a `Download` instance.
    """
    streams_dict = streams(media_url, **kwargs)
    md = metadata(media_url)
    fn = utils.make_filename(md['title'])
    if not video:
        if not streams_dict[0]:
            print('Sorry, audio only is not available for this stream.')
            return None
        streams_dict[1] = None
    if not audio:
        streams_dict[0] = None
    filepath = os.path.join(folder, fn)
    if subtitles:
        subs = _subtitles(media_url, lang=subtitles)
        if subs:
            with open(f'{filepath}.srt', 'w') as f:
                f.write(subs)
    d = Download(streams_dict, f'{filepath}', utils.merge_files)
    return d
Ejemplo n.º 5
0
    def download(self, site, id):
        vid = self.channels[site]["videos"][id]
        fullid = site + "_" + vid["id"]
        filename = fullid + "_" + vid["title"][:25]
        filename = make_filename(filename)

        file = FileRef(creator=self, path=filename)

        #cmd = read_instructions(site)["download"]
        try:
            downloader = read_instructions(site)["downloader"]
            DLclass = downloaders.__dict__[downloader]
            #os.system(cmd.format(url=vid["url"],filename=filename))
            outputfile = DLclass().download(url=vid["url"],
                                            path=file.relative())
            #print(outputfile)
            newvid = Video(id=fullid,
                           title=vid["title"],
                           thumbnail=URIRef(vid["thumbnail"]),
                           creators=[self],
                           remote={site: id},
                           file=FileRef(creator=self, path=outputfile))
            self.videos[fullid] = newvid
            newvid.save()

        except:
            raise
Ejemplo n.º 6
0
def output_data(centroids, times, num_nonzeros, video_src):
    """Writes select data into text file
    
    Saves centroid coordinates, traveled distance and elapsed time to file. 
    output_data is called in postprocessing. 
    
    Parameters
    -----------
    centroids : list
        list of centroids of tracked object sampled at times
    times : list
        List of sampling times. Sampling every step-th frame.
    num_nonzeros : list
        List of total number of nonzero pixel values in binary foreground mask 
        for each time step
        
    References
    -----------
    getcoords.find_scale, getcoords.projective_transform, set_review_flags
    """
    # Initalize variables that hold sums
    total_dist = 0
    dists = np.empty(len(centroids))
    rep_counter = 0
    max_rep_count = 0

    scale = np.load("src\\scaling_factor.npy")
    M = np.load("src\\projection_matrix.npy")

    fname_out = utils.make_filename(video_src, ".txt", parent="res")

    try:
        os.mkdir("res")
    except FileExistsError:
        pass

    with open(fname_out, "w") as f:
        f.write("No.,cX,cY,time,dist\n")
        for i, cent in enumerate(centroids):
            #Skip point if we have lost track
            if np.any(cent[:2] == 0) or (i > 0 and
                                         np.any(centroids[i - 1][:2] == 0)):
                #f.write("#Discarding line! {:0.2f},{:0.2f} -> {:0.2f},{:0.2f}\n".format(centroids[i-1][0], centroids[i-1][1], cent[0], cent[1]))
                dist = np.nan
                continue

            time = times[i]
            dist, cX, cY, cZ = compute_distance(centroids, cent, i, M, scale)

            f.write("{:d},{:0.2f},{:0.2f},{:0.2f},{:0.2f}\n".format(
                i, cX, cY, time, dist))
            total_dist += dist if dist is not np.nan else 0
            dists[i] = dist

        f.write("Total dist in mm: {:0.4f}\n".format(total_dist))
        f.write("Total time in sec: {:0.4f}\n".format(times[-1]))

        f = flag_file_review(f, dists, num_nonzeros, times)
Ejemplo n.º 7
0
    def run_mencoder(self, accepted=False):
        if self.mplayer_preview_pid:
            call(['kill', str(self.mplayer_preview_pid)])
            self.mplayer_preview_pid = 0

        parameters = self.get_params_from_gui()

        self.schedule_timer.stop()
        channel_text = parameters.get('channel_text')
        append_suffix = self.append_suffix.isChecked()
        play_while_recording = self.play_while_recording.isChecked()
        filename = utils.make_filename(str(self.outputfile.text()),
                                       channel_text,
                                       append_suffix=append_suffix)

        self.filename = filename

        if not accepted and os.path.exists(filename):
            dialog = FileExistsDialog(self)
            dialog.show()
        else:
            self.stopButton.setEnabled(True)
            self.runButton.setEnabled(False)

            pre_command = str(self.pre_command.text())
            if pre_command:
                cmds = [c for c in re.split("\s+", pre_command) if c]
                try:
                    call(cmds)
                except OSError:
                    self.error_dialog.showMessage("excecution of %s failed" %
                                                  (pre_command, ))

            cmd = utils.generate_command(parameters)
            env = os.environ.copy()
            if parameters.get('setenvvars'):
                for key, val in parameters.get('envvars').items():
                    env[key] = val
            try:
                self.mencoder_instance = Popen(cmd, env=env)
                self.mencoder_pid = self.mencoder_instance.pid
            except OSError:
                self.error_dialog.showMessage("excecution of %s failed" %
                                              " ".join(cmd))

            if self.mencoder_pid:
                self.status_label.setText(
                    self.tr('Recording... %1').arg(
                        utils.secs_to_str(self.time_running)))
                self.checker_timer.start(1000)
                self.scheduleButton.setEnabled(False)
                self.cancel_sheduleButton.setEnabled(False)
                if play_while_recording:
                    self.preview_file_timer.start(2000)
            else:
                self.stopButton.setEnabled(False)
                self.runButton.setEnabled(True)
Ejemplo n.º 8
0
    def run_mencoder(self, accepted=False):
        if self.mplayer_preview_pid:
            call(['kill', str(self.mplayer_preview_pid)])
            self.mplayer_preview_pid = 0

        parameters = self.get_params_from_gui()

        self.schedule_timer.stop()
        channel_text = parameters.get('channel_text')
        append_suffix = self.append_suffix.isChecked()
        play_while_recording = self.play_while_recording.isChecked()
        filename = utils.make_filename(str(self.outputfile.text()),
                                       channel_text,
                                       append_suffix=append_suffix)

        self.filename = filename

        if not accepted and os.path.exists(filename):
            dialog = FileExistsDialog(self)
            dialog.show()
        else:
            self.stopButton.setEnabled(True)
            self.runButton.setEnabled(False)

            pre_command = str(self.pre_command.text())
            if pre_command:
                cmds = [c for c in re.split("\s+", pre_command) if c]
                try:
                    call(cmds)
                except OSError:
                    self.error_dialog.showMessage("excecution of %s failed" %
                                                  (pre_command,))

            cmd = utils.generate_command(parameters)
            env = os.environ.copy()
            if parameters.get('setenvvars'):
                for key, val in parameters.get('envvars').items():
                    env[key] = val
            try:
                self.mencoder_instance = Popen(cmd, env=env)
                self.mencoder_pid = self.mencoder_instance.pid
            except OSError:
                self.error_dialog.showMessage("excecution of %s failed" % " ".join(cmd))

            if self.mencoder_pid:
                self.status_label.setText(self.tr('Recording... %1').arg(
                    utils.secs_to_str(self.time_running)
                    ))
                self.checker_timer.start(1000)
                self.scheduleButton.setEnabled(False)
                self.cancel_sheduleButton.setEnabled(False)
                if play_while_recording:
                    self.preview_file_timer.start(2000)
            else:
                self.stopButton.setEnabled(False)
                self.runButton.setEnabled(True)
Ejemplo n.º 9
0
 def save_xls(self):
     """
     Export jlist to an excel file
     """
     if not self.model.jcpds_exist():
         return
     temp_dir = get_temp_dir(self.model.get_base_ptn_filename())
     filen_xls_t = make_filename(self.model.get_base_ptn_filename(),
                                 'jlist.xls',
                                 temp_dir=temp_dir)
     filen_xls = dialog_savefile(self.widget, filen_xls_t)
     if str(filen_xls) == '':
         return
     xls_jlist(filen_xls, self.model.jcpds_lst,
               self.widget.doubleSpinBox_Pressure.value(),
               self.widget.doubleSpinBox_Temperature.value())
Ejemplo n.º 10
0
 def save_to_xls(self):
     temp_dir = get_temp_dir(self.model.get_base_ptn_filename())
     filen_xls = make_filename(self.model.get_base_ptn_filename(),
                   'peakfit.xls', temp_dir=temp_dir)
     reply = QtWidgets.QMessageBox.question(
         self.widget, 'Question',
         'Do you want to save in default filename, %s ?' % filen_xls,
         QtWidgets.QMessageBox.Yes | QtWidgets.QMessageBox.No,
         QtWidgets.QMessageBox.Yes)
     if reply == QtWidgets.QMessageBox.No:
         filen_xls = QtWidgets.QFileDialog.getSaveFileName(
             self.widget, "Save an Excel File", filen_xls, "(*.xls)")
     else:
         if os.path.exists(filen_xls):
             reply = QtWidgets.QMessageBox.question(
                 self.widget, 'Question',
                 'The file already exist.  Overwrite?',
                 QtWidgets.QMessageBox.Yes | QtWidgets.QMessageBox.No,
                 QtWidgets.QMessageBox.No)
             if reply == QtWidgets.QMessageBox.No:
                 return
     self.model.save_peak_fit_results_to_xls(filen_xls)
Ejemplo n.º 11
0
    def _goto_dpp_next_file(self, move):

        filelist_chi = get_sorted_filelist(
            self.model.chi_path,
            sorted_by_name=self.widget.radioButton_SortbyNme.isChecked(),
            search_ext='*.chi')
        filelist_dpp = get_sorted_filelist(
            self.model.chi_path,
            sorted_by_name=self.widget.radioButton_SortbyNme.isChecked(),
            search_ext='*.dpp')

        idx_chi = find_from_filelist(
            filelist_chi,
            os.path.split(self.model.base_ptn.fname)[1])
        dpp_filen = make_filename(self.model.base_ptn.fname, 'dpp')
        idx_dpp = find_from_filelist(filelist_dpp, dpp_filen)

        if idx_chi == -1:
            QtWidgets.QMessageBox.warning(self.widget, "Warning",
                                          "Cannot find current chi file")
            return  # added newly

        # for radioButton_NavDPP
        if idx_dpp == -1:
            QtWidgets.QMessageBox.warning(
                self.widget, "Warning", "Cannot find current dpp file.\n" +
                "Manually save one for current chi file first.")
            return  # added newly

        step = self.widget.spinBox_FileStep.value()
        if move == 'next':
            idx_chi_new = idx_chi + step
        elif move == 'previous':
            idx_chi_new = idx_chi - step
        elif move == 'last':
            idx_chi_new = filelist_chi.__len__() - 1
            if idx_chi == idx_chi_new:
                QtWidgets.QMessageBox.warning(self.widget, "Warning",
                                              "It is already the last file.")
                return
        elif move == 'first':
            idx_chi_new = 0
            if idx_chi == idx_chi_new:
                QtWidgets.QMessageBox.warning(self.widget, "Warning",
                                              "It is already the first file.")
                return
        if idx_chi_new > filelist_chi.__len__() - 1:
            idx_chi_new = filelist_chi.__len__() - 1
            if idx_chi == idx_chi_new:
                QtWidgets.QMessageBox.warning(self.widget, "Warning",
                                              "It is already the last file.")
                return
        if idx_chi_new < 0:
            idx_chi_new = 0
            if idx_chi == idx_chi_new:
                QtWidgets.QMessageBox.warning(self.widget, "Warning",
                                              "It is already the first file.")
                return

        if self.widget.checkBox_SaveDPPMove.isChecked():
            self.session_ctrl.save_dpp(quiet=True)
        else:
            reply = QtWidgets.QMessageBox.question(
                self.widget, 'Message',
                'Do you want to save this to dpp before you move to the next?',
                QtWidgets.QMessageBox.Yes | QtWidgets.QMessageBox.No,
                QtWidgets.QMessageBox.Yes)
            if reply == QtWidgets.QMessageBox.Yes:
                self.session_ctrl.save_dpp()

        new_filename_chi = filelist_chi[idx_chi_new]
        new_filename_dpp = make_filename(new_filename_chi, 'dpp')
        idx = find_from_filelist(filelist_dpp, new_filename_dpp)

        if idx == -1:
            # no pre-existing dpp
            # check the checkbox for autogenerate
            if self.widget.checkBox_AutoGenDPP.isChecked():
                self.base_ptn_ctrl._load_a_new_pattern(new_filename_chi)
                self.session_ctrl.save_dpp(quiet=True)
                self.model.clear_section_list()
                self.plot_ctrl.update()
            else:
                QtWidgets.QMessageBox.warning(
                    self.widget, "Warning", "Cannot find pre-existing dpp.\n" +
                    "Consider Create with Move function.")
                return
            # call autogenerate subroutine
            # self._load_a_new_pattern(new_filename_chi)
            # self.model.set_base_ptn_color(self.obj_color)
            # self.plot_ctrl.update()
        else:
            # pre-existing dpp
            # question if overwrite or not
            if self.widget.checkBox_AutoGenDPP.isChecked() and \
                (not self.widget.checkBox_AutogenMissing.isChecked()):
                reply = QtWidgets.QMessageBox.question(
                    self.widget, 'Message',
                    "The next pattern already has a dpp.\n" +
                    "If you want to overwrite the existing one based" +
                    " on the dpp of the last pattern, choose YES.\n" +
                    "If you want to keep and open the existing dpp, choose NO.",
                    QtWidgets.QMessageBox.Yes | QtWidgets.QMessageBox.No,
                    QtWidgets.QMessageBox.No)
                if reply == QtWidgets.QMessageBox.Yes:
                    self.base_ptn_ctrl._load_a_new_pattern(new_filename_chi)
                    self.session_ctrl.save_dpp(quiet=True)
                    self.model.clear_section_list()
                    self.plot_ctrl.update()
                else:
                    # load the existing dpp
                    # QtWidgets.QMessageBox.warning(
                    #    self.widget, "Warning", "The existing dpp will be open.")
                    success = self.session_ctrl._load_dpp(new_filename_dpp)
                    if success:
                        if self.model.exist_in_waterfall(
                                self.model.base_ptn.fname):
                            self.widget.pushButton_AddBasePtn.setChecked(True)
                        else:
                            self.widget.pushButton_AddBasePtn.setChecked(False)
                        if self.widget.checkBox_ShowCake.isChecked():
                            self.session_ctrl._load_cake_format_file()
                        self.plot_ctrl.update()
                    else:
                        QtWidgets.QMessageBox.warning(
                            self.widget, "Warning",
                            "DPP loading was not successful.")
                        return
            else:
                # simply open the next existing one
                success = self.session_ctrl._load_dpp(new_filename_dpp)
                if success:
                    if self.model.exist_in_waterfall(
                            self.model.base_ptn.fname):
                        self.widget.pushButton_AddBasePtn.setChecked(True)
                    else:
                        self.widget.pushButton_AddBasePtn.setChecked(False)
                    if self.widget.checkBox_ShowCake.isChecked():
                        self.session_ctrl._load_cake_format_file()
                    self.plot_ctrl.update()
                else:
                    QtWidgets.QMessageBox.warning(
                        self.widget, "Warning",
                        "DPP loading was not successful.")
                    return
        self.jcpdstable_ctrl.update()
        self.peakfit_table_ctrl.update_sections()
        self.peakfit_table_ctrl.update_peak_parameters()
        self.peakfit_table_ctrl.update_baseline_constraints()
        self.peakfit_table_ctrl.update_peak_constraints()
        return
Ejemplo n.º 12
0
def get_connecting_nodes(diff_start_end_elr_dat, route_name=None, update=False, verbose=False):
    """
    Get data of connecting points for different ELRs.

    :param diff_start_end_elr_dat: data frame where StartELR != EndELR
    :type diff_start_end_elr_dat: pandas.DataFrame
    :param route_name: name of a Route; if ``None`` (default), all Routes
    :type route_name: str or None
    :param update: whether to check on update and proceed to update the package data,
        defaults to ``False``
    :type update: bool
    :param verbose: whether to print relevant information in console as the function runs,
        defaults to ``False``
    :type verbose: bool or int
    :return: data of connecting points for different ELRs
    :rtype: pandas.DataFrame

    **Test**::

        from mssqlserver.metex import view_metex_schedule8_incident_locations
        from models.prototype.furlong import get_connecting_nodes

        update = False
        verbose = True

        route_name = None
        diff_start_end_elr_dat = view_metex_schedule8_incident_locations(
            route_name=route_name, start_and_end_elr='diff', verbose=verbose)
        connecting_nodes = get_connecting_nodes(diff_start_end_elr_dat, route_name, update, verbose)
        print(connecting_nodes)

        route_name = 'Anglia'
        diff_start_end_elr_dat = view_metex_schedule8_incident_locations(
            route_name=route_name, start_and_end_elr='diff', verbose=verbose)
        connecting_nodes = get_connecting_nodes(diff_start_end_elr_dat, route_name, update, verbose)
        print(connecting_nodes)
    """

    filename = "connections-between-different-ELRs"
    pickle_filename = make_filename(filename, route_name)
    path_to_pickle = cdd_geodata(pickle_filename)

    if os.path.isfile(path_to_pickle) and not update:
        return load_pickle(path_to_pickle, verbose=verbose)

    else:
        try:
            pickle_filename_temp = make_filename(filename)
            path_to_pickle_temp = cdd_geodata(pickle_filename_temp)

            if os.path.isfile(path_to_pickle_temp) and not update:
                connecting_nodes_all = load_pickle(path_to_pickle_temp)
                connecting_nodes = get_subset(connecting_nodes_all, route_name)

            else:
                diff_elr_mileages = diff_start_end_elr_dat.drop_duplicates()

                em = ELRMileages()
                print("Searching for connecting ELRs ... ", end="") if verbose else ""
                mileage_file_dir = cdd_railway_codes("line data\\elrs-and-mileages\\mileages")

                # noinspection PyTypeChecker
                conn_mileages = diff_elr_mileages.apply(
                    lambda x: em.get_conn_mileages(x.StartELR, x.EndELR, update,
                                                   pickle_mileage_file=True,
                                                   data_dir=mileage_file_dir), axis=1)

                print("\nFinished.") if verbose else ""

                conn_mileages_data = pd.DataFrame(conn_mileages.to_list(), index=diff_elr_mileages.index,
                                                  columns=['StartELR_EndMileage', 'ConnELR',
                                                           'ConnELR_StartMileage',
                                                           'ConnELR_EndMileage', 'EndELR_StartMileage'])

                connecting_nodes = diff_elr_mileages.join(conn_mileages_data)
                connecting_nodes.set_index(['StartELR', 'StartMileage', 'EndELR', 'EndMileage'],
                                           inplace=True)

            save_pickle(connecting_nodes, path_to_pickle, verbose=verbose)

            return connecting_nodes

        except Exception as e:
            print("Failed to get \"{}\". {}.".format(os.path.splitext(pickle_filename)[0], e))
Ejemplo n.º 13
0
def get_adjusted_mileages_same_start_end_elrs(route_name, weather_category, shift_yards_same_elr,
                                              update=False, verbose=False):
    """
    Get adjusted mileages for each incident location where StartELR == EndELR.

    :param route_name: name of a Route; if ``None``, all Routes
    :type route_name: str or None
    :param weather_category: weather category; if ``None``, all weather categories
    :type weather_category: str or None
    :param shift_yards_same_elr: yards by which the start/end mileage is shifted for adjustment,
        given that StartELR == EndELR
    :type shift_yards_same_elr: int or float
    :param update: whether to check on update and proceed to update the package data,
        defaults to ``False``
    :type update: bool
    :param verbose: whether to print relevant information in console as the function runs,
        defaults to ``False``
    :type verbose: bool or int
    :return: adjusted mileages for each incident location where StartELR == EndELR
    :rtype: pandas.DataFrame

    **Test**::

        from models.prototype.furlong import get_adjusted_mileages_same_start_end_elrs

        route_name           = None
        weather_category     = None
        shift_yards_same_elr = 220
        update               = True
        verbose              = True

        adj_mileages = get_adjusted_mileages_same_start_end_elrs(route_name, weather_category,
                                                                 shift_yards_same_elr, update, verbose)
        print(adj_mileages)
    """

    filename = "adjusted-mileages-same-start-end-ELRs"
    pickle_filename = make_filename(filename, route_name, weather_category, shift_yards_same_elr)
    path_to_pickle = cdd_geodata(pickle_filename)

    if os.path.isfile(path_to_pickle) and not update:
        adj_mileages = load_pickle(path_to_pickle)
        return adj_mileages

    else:
        try:
            # Get data of incident locations where the 'StartELR' and 'EndELR' are THE SAME
            incident_locations = metex.view_metex_schedule8_incident_locations(
                route_name, weather_category, start_and_end_elr='same', verbose=verbose)

            # Get furlong information as reference
            ref_furlongs = vegetation.view_nr_vegetation_furlong_data(verbose=verbose)

            # Calculate adjusted furlong locations for each incident (for vegetation conditions)
            # noinspection PyTypeChecker
            adjusted_mileages = incident_locations.apply(
                lambda x: adjust_incident_mileages(
                    ref_furlongs, x.StartELR, x.StartMileage_num, x.EndMileage_num,
                    shift_yards_same_elr),
                axis=1)

            # Get adjusted mileage data
            adj_mileages = pd.DataFrame(list(adjusted_mileages), index=incident_locations.index,
                                        columns=['StartMileage_Adj', 'EndMileage_Adj',
                                                 'StartMileage_num_Adj', 'EndMileage_num_Adj',
                                                 'Section_Length_Adj',  # yards
                                                 'Critical_FurlongIDs'])

            save_pickle(adj_mileages, path_to_pickle, verbose=verbose)

            return adj_mileages

        except Exception as e:
            print("Failed to get \"{}\". {}.".format(os.path.splitext(pickle_filename)[0], e))
Ejemplo n.º 14
0
def get_adjusted_mileages_diff_start_end_elrs(route_name, weather_category, shift_yards_diff_elr,
                                              update=False, verbose=False):
    """
    Get adjusted mileages for each incident location where StartELR != EndELR.

    :param route_name: name of a Route; if ``None``, all Routes
    :type route_name: str or None
    :param weather_category: weather category; if ``None``, all weather categories
    :type weather_category: str or None
    :param shift_yards_diff_elr: yards by which the start/end mileage is shifted for adjustment,
        given that StartELR == EndELR
    :type shift_yards_diff_elr: int or float
    :param update: whether to check on update and proceed to update the package data,
        defaults to ``False``
    :type update: bool
    :param verbose: whether to print relevant information in console as the function runs,
        defaults to ``False``
    :type verbose: bool or int
    :return: adjusted mileages for each incident location where StartELR != EndELR
    :rtype: pandas.DataFrame

    **Test**::

        from models.prototype.furlong import get_adjusted_mileages_diff_start_end_elrs

        route_name           = None
        weather_category     = None
        shift_yards_diff_elr = 220
        update               = True
        verbose              = True

        adj_mileages = get_adjusted_mileages_diff_start_end_elrs(route_name, weather_category,
                                                                 shift_yards_diff_elr, update, verbose)
        print(adj_mileages)
    """

    filename = "adjusted-mileages-diff-start-end-ELRs"
    pickle_filename = make_filename(filename, route_name, weather_category, shift_yards_diff_elr)
    path_to_pickle = cdd_geodata(pickle_filename)

    if os.path.isfile(path_to_pickle) and not update:
        return load_pickle(path_to_pickle)

    else:
        try:
            # Get data for which the 'StartELR' and 'EndELR' are DIFFERENT
            incident_locations_diff_start_end_elr = metex.view_metex_schedule8_incident_locations(
                route_name, weather_category, start_and_end_elr='diff', verbose=verbose)
            # Get connecting points for different (ELRs, mileages)
            connecting_nodes = get_connecting_nodes(incident_locations_diff_start_end_elr,
                                                    route_name, update=False, verbose=False)

            # Find End Mileage and Start Mileage of StartELR and EndELR, respectively
            locations_conn = incident_locations_diff_start_end_elr.join(
                connecting_nodes.set_index(['StanoxSection'], append=True),
                on=list(connecting_nodes.index.names) + ['StanoxSection'], rsuffix='_conn').dropna()
            locations_conn.drop(columns=[x for x in locations_conn.columns if '_conn' in x],
                                inplace=True)
            # Remove the data records where connecting nodes are unknown
            locations_conn = locations_conn[~((locations_conn.StartELR_EndMileage == '') |
                                              (locations_conn.EndELR_StartMileage == ''))]
            # Convert str mileages to num
            num_conn_colnames = ['StartELR_EndMileage_num', 'EndELR_StartMileage_num',
                                 'ConnELR_StartMileage_num', 'ConnELR_EndMileage_num']
            str_conn_colnames = ['StartELR_EndMileage', 'EndELR_StartMileage',
                                 'ConnELR_StartMileage', 'ConnELR_EndMileage']
            locations_conn[num_conn_colnames] = locations_conn[str_conn_colnames].applymap(
                nr_mileage_str_to_num)

            # Get furlong information
            nr_furlong_data = vegetation.view_nr_vegetation_furlong_data(verbose=verbose)

            adjusted_conn_elr_mileages = locations_conn.apply(
                lambda x: adjust_incident_mileages(
                    nr_furlong_data, x.ConnELR, x.ConnELR_StartMileage_num, x.ConnELR_EndMileage_num, 0)
                if x.ConnELR != '' else tuple(['', '', np.nan, np.nan, 0.0, []]),
                axis=1)
            adjusted_conn_mileages = pd.DataFrame(adjusted_conn_elr_mileages.tolist(),
                                                  index=locations_conn.index,
                                                  columns=['Conn_StartMileage_Adj',
                                                           'ConnELR_EndMileage_Adj',
                                                           'Conn_StartMileage_num_Adj',
                                                           'ConnELR_EndMileage_num_Adj',
                                                           'ConnELR_Length_Adj',  # yards
                                                           'ConnELR_Critical_FurlongIDs'])

            # Processing Start locations
            adjusted_start_elr_mileages = locations_conn.apply(
                lambda x: adjust_incident_mileages(
                    nr_furlong_data, x.StartELR, x.StartMileage_num, x.StartELR_EndMileage_num,
                    shift_yards_diff_elr),
                axis=1)

            # Create a dataframe adjusted mileage data of the Start ELRs
            adjusted_start_mileages = pd.DataFrame(adjusted_start_elr_mileages.tolist(),
                                                   index=locations_conn.index,
                                                   columns=['StartMileage_Adj',
                                                            'StartELR_EndMileage_Adj',
                                                            'StartMileage_num_Adj',
                                                            'StartELR_EndMileage_num_Adj',
                                                            'StartELR_Length_Adj',  # yards
                                                            'StartELR_Critical_FurlongIDs'])

            # Processing End locations
            adjusted_end_elr_mileages = locations_conn.apply(
                lambda x: adjust_incident_mileages(nr_furlong_data, x.EndELR, x.EndELR_StartMileage_num,
                                                   x.EndMileage_num, shift_yards_diff_elr),
                axis=1)

            # Create a dataframe of adjusted mileage data of the EndELRs
            adjusted_end_mileages = pd.DataFrame(adjusted_end_elr_mileages.tolist(),
                                                 index=locations_conn.index,
                                                 columns=['EndELR_StartMileage_Adj', 'EndMileage_Adj',
                                                          'EndELR_StartMileage_num_Adj',
                                                          'EndMileage_num_Adj',
                                                          'EndELR_Length_Adj',  # yards
                                                          'EndELR_Critical_FurlongIDs'])

            # Combine 'adjusted_start_mileages' and 'adjusted_end_mileages'
            adj_mileages = adjusted_start_mileages.join(adjusted_conn_mileages).join(
                adjusted_end_mileages)

            adj_mileages.dropna(subset=['StartMileage_num_Adj', 'EndMileage_num_Adj'], inplace=True)

            adj_mileages['Section_Length_Adj'] = list(zip(
                adj_mileages.StartELR_Length_Adj, adj_mileages.ConnELR_Length_Adj,
                adj_mileages.EndELR_Length_Adj))

            adj_mileages['Critical_FurlongIDs'] = \
                adj_mileages.StartELR_Critical_FurlongIDs + \
                adj_mileages.EndELR_Critical_FurlongIDs + \
                adj_mileages.ConnELR_Critical_FurlongIDs
            adj_mileages.Critical_FurlongIDs = adj_mileages.Critical_FurlongIDs.map(
                lambda x: list(set(x)))

            save_pickle(adj_mileages, path_to_pickle, verbose=update)

            return adj_mileages

        except Exception as e:
            print("Failed to get \"{}\". {}.".format(os.path.splitext(pickle_filename)[0], e))
Ejemplo n.º 15
0
def get_incident_location_furlongs(route_name=None, weather_category=None,
                                   shift_yards_same_elr=220, shift_yards_diff_elr=220,
                                   update=False, verbose=False):
    """
    Get data of furlongs for incident locations.

    :param route_name: name of a Route; if ``None`` (default), all available Routes
    :type route_name: str or None
    :param weather_category: weather category; if ``None`` (default), all available weather categories
    :type weather_category: str or None
    :param shift_yards_same_elr: yards by which the start/end mileage is shifted for adjustment,
        given that ``StartELR == EndELR``, defaults to ``220``
    :type shift_yards_same_elr: int or float
    :param shift_yards_diff_elr: yards by which the start/end mileage is shifted for adjustment,
        given that ``StartELR != EndELR``, defaults to ``220``
    :type shift_yards_diff_elr: int or float
    :param update: whether to check on update and proceed to update the package data,
        defaults to ``False``
    :type update: bool
    :param verbose: whether to print relevant information in console as the function runs,
        defaults to ``False``
    :type verbose: bool or int
    :return: data of furlongs for incident locations
    :rtype: pandas.DataFrame or None

    **Test**::

        >>> from coordinator.furlong import get_incident_location_furlongs

        weather_category     = None
        shift_yards_same_elr = 220
        shift_yards_diff_elr = 440
        update               = True
        verbose              = True

        >>> il_furlongs = get_incident_location_furlongs(update=True, verbose=True)

        >>> il_furlongs.tail()

        >>> il_furlongs = get_incident_location_furlongs(route_name='Anglia', update=True, verbose=True)

        >>> il_furlongs.tail()

    """

    filename = "incident-location-furlongs"
    pickle_filename = make_filename(filename, route_name, weather_category, shift_yards_same_elr,
                                    shift_yards_diff_elr)
    path_to_pickle = cdd_geodata(pickle_filename)

    if os.path.isfile(path_to_pickle) and not update:
        incident_location_furlongs = load_pickle(path_to_pickle)

    else:
        try:
            use_col_names = ['Section_Length_Adj', 'Critical_FurlongIDs']

            adjusted_mileages_same_start_end_elrs = get_adjusted_mileages_same_start_end_elrs(
                route_name, weather_category, shift_yards_same_elr, verbose=verbose)
            ilf_same = adjusted_mileages_same_start_end_elrs[use_col_names]

            adjusted_mileages_diff_start_end_elrs = get_adjusted_mileages_diff_start_end_elrs(
                route_name, weather_category, shift_yards_diff_elr, verbose=verbose)
            ilf_diff = adjusted_mileages_diff_start_end_elrs[use_col_names]

            furlongs_dat = pd.concat([ilf_same, ilf_diff])

            incident_locations = metex.view_metex_schedule8_incident_locations(
                route_name, weather_category, verbose=verbose)

            # Merge the above data sets
            incident_location_furlongs = incident_locations.join(furlongs_dat, how='right')
            incident_location_furlongs.drop(['StartMileage_num', 'EndMileage_num'], axis=1, inplace=True)
            incident_location_furlongs.index = range(len(incident_location_furlongs))

            save_pickle(incident_location_furlongs, path_to_pickle, verbose=verbose)

        except Exception as e:
            print("Failed to get \"{}\". {}.".format(os.path.splitext(pickle_filename)[0], e))
            incident_location_furlongs = None

    return incident_location_furlongs
Ejemplo n.º 16
0
 def make_filename(self, extension, original=False):
     """
     :param extension: extension without a dot
     """
     return make_filename(self.base_ptn.fname, extension, original=original)
Ejemplo n.º 17
0
def get_furlongs_diff_start_end_elrs(route_name=None, weather_category=None, shift_yards_diff_elr=220,
                                     update=False, verbose=False):
    """
    Get furlongs data for incident locations each identified by the same start and end ELRs,
    i.e. StartELR != EndELR.

    :param route_name: name of a Route; if ``None`` (default), all Routes
    :type route_name: str or None
    :param weather_category: weather category; if ``None`` (default), all weather categories
    :type weather_category: str or None
    :param shift_yards_diff_elr: yards by which the start/end mileage is shifted for adjustment,
        given that StartELR == EndELR, defaults to ``220``
    :type shift_yards_diff_elr: int or float
    :param update: whether to check on update and proceed to update the package data,
        defaults to ``False``
    :type update: bool
    :param verbose: whether to print relevant information in console as the function runs,
        defaults to ``False``
    :type verbose: bool or int
    :return: furlongs data of incident locations each identified by the same start and end ELRs
    :rtype: pandas.DataFrame

    **Test**::

        from models.prototype.furlong import get_furlongs_diff_start_end_elrs

        route_name           = None
        weather_category     = None
        shift_yards_diff_elr = 220
        update               = True
        verbose              = True

        furlongs_diff_start_end_elr = get_furlongs_diff_start_end_elrs(
            route_name, weather_category, shift_yards_diff_elr, update, verbose)
        print(furlongs_diff_start_end_elr)
    """

    filename = "furlongs-diff-start-end-ELRs"
    pickle_filename = make_filename(filename, route_name, weather_category, shift_yards_diff_elr)
    path_to_pickle = cdd_geodata(pickle_filename)

    if os.path.isfile(path_to_pickle) and not update:
        furlongs_diff_start_end_elr = load_pickle(path_to_pickle)
        return furlongs_diff_start_end_elr

    else:
        adj_mileages = get_adjusted_mileages_diff_start_end_elrs(route_name, weather_category,
                                                                 shift_yards_diff_elr,
                                                                 verbose=verbose)

        try:
            # Get furlong information
            nr_furlong_data = vegetation.view_nr_vegetation_furlong_data(verbose=verbose)
            # Form a list containing all the furlong IDs
            furlong_ids = list(set(itertools.chain(*adj_mileages.Critical_FurlongIDs)))

            # Select critical (i.e. incident) furlongs
            furlongs_diff_start_end_elr = nr_furlong_data.loc[furlong_ids]

            # Save 'incident_furlongs_diff_start_end_elr'
            save_pickle(furlongs_diff_start_end_elr, path_to_pickle, verbose=verbose)

            return furlongs_diff_start_end_elr

        except Exception as e:
            print("Failed to get \"{}\". {}.".format(os.path.splitext(pickle_filename)[0], e))
Ejemplo n.º 18
0
    def perform_ucfit(self):
        # get jcpds data in df.  use display to choose data points
        if self.model.section_lst == []:
            QtWidgets.QMessageBox.warning(
                self.widget, "Warning",
                "No peak fitting result exist for this file.")
            return
        if self.phase == None:
            QtWidgets.QMessageBox.warning(
                self.widget, "Warning", "No phase has been chosen for fitting")
            return
        data_by_phase_df = self._get_all_peakfit_results_df()
        data_to_fit_df = data_by_phase_df[self.phase].loc[data_by_phase_df[
            self.phase]['display'] == True]
        # number of data point check
        n_data_points = len(data_to_fit_df.index)
        if n_data_points < 2:
            QtWidgets.QMessageBox.warning(self.widget, "Warning",
                                          "You need at least 2 data points.")
            return
        # perform ucfit
        text_output = self.phase + '\n\n'
        text_output += 'Fitted unit cell parameters \n'
        text_output += 'Crystal system = ' + \
            self.widget.comboBox_Symmetry.currentText() + '\n'
        wavelength = self.model.get_base_ptn_wavelength()
        if self.widget.comboBox_Symmetry.currentText() == 'cubic':
            a, s_a, v, s_v, res_lin, res_nlin = fit_cubic_cell(data_to_fit_df,
                                                               wavelength,
                                                               verbose=False)
            cell_params = [a, a, a]
            text_output += "a = {0:.5f} +/- {1:.5f} \n".format(a, s_a)
            text_output += "V = {0:.5f} +/- {1:.5f} \n\n".format(v, s_v)
        elif self.widget.comboBox_Symmetry.currentText() == 'tetragonal':
            if n_data_points < 3:
                QtWidgets.QMessageBox.warning(
                    self.widget, "Warning",
                    "You need at least 3 data points for tetragonal.")
                return
            a, s_a, c, s_c, v, s_v, res_lin, res_nlin = \
                fit_tetragonal_cell(data_to_fit_df, wavelength,
                                    verbose=False)
            cell_params = [a, a, c]
            text_output += "a = {0:.5f} +/- {1:.5f} \n".format(a, s_a)
            text_output += "c = {0:.5f} +/- {1:.5f} \n".format(c, s_c)
            text_output += "V = {0:.5f} +/- {1:.5f} \n\n".format(v, s_v)
        elif self.widget.comboBox_Symmetry.currentText() == 'hexagonal':
            if n_data_points < 3:
                QtWidgets.QMessageBox.warning(
                    self.widget, "Warning",
                    "You need at least 3 data points for hexagonal.")
                return
            a, s_a, c, s_c, v, s_v, res_lin, res_nlin = \
                fit_hexagonal_cell(data_to_fit_df, wavelength,
                                  verbose=False)
            cell_params = [a, a, c]
            text_output += "a = {0:.5f} +/- {1:.5f} \n".format(a, s_a)
            text_output += "c = {0:.5f} +/- {1:.5f} \n".format(c, s_c)
            text_output += "V = {0:.5f} +/- {1:.5f} \n\n".format(v, s_v)
        elif self.widget.comboBox_Symmetry.currentText() == 'orthorhombic':
            if n_data_points < 4:
                QtWidgets.QMessageBox.warning(
                    self.widget, "Warning",
                    "You need at least 4 data points for orthorhombic.")
                return
            a, s_a, b, s_b, c, s_c, v, s_v, res_lin, res_nlin = \
                fit_orthorhombic_cell(data_to_fit_df, wavelength,
                                      verbose=False)
            cell_params = [a, b, c]
            text_output += "a = {0:.5f} +/- {1:.5f} \n".format(a, s_a)
            text_output += "b = {0:.5f} +/- {1:.5f} \n".format(b, s_b)
            text_output += "c = {0:.5f} +/- {1:.5f} \n".format(c, s_c)
            text_output += "V = {0:.5f} +/- {1:.5f} \n\n".format(v, s_v)
        # output results
        output_df = make_output_table(res_lin, res_nlin, data_to_fit_df)
        text_output += 'Output table\n'
        text_output += output_df[[
            'h', 'k', 'l', 'twoth', 'dsp', 'twoth residue'
        ]].to_string()
        text_output += '\n\nHat: influence for the fit result. \n'
        text_output += '     1 ~ large influence, 0 ~ no influence.\n'
        text_output += output_df[['h', 'k', 'l', 'twoth', 'dsp',
                                  'hat']].to_string()
        text_output += '\n\nRstudent: how much the parameter would change' + \
            ' if deleted.\n'
        text_output += output_df[['h', 'k', 'l', 'twoth', 'dsp',
                                  'Rstudent']].to_string()
        text_output += '\n\ndfFits: deletion diagnostic giving' + \
            ' the change in\n'
        text_output += '        the predicted value twotheta.\n'
        text_output += '        upon deletion of the data point as a ' + \
            'multiple of\n'
        text_output += '        the standard deviation for 1/d-spacing^2.\n'
        text_output += output_df[['h', 'k', 'l', 'twoth', 'dsp',
                                  'dfFits']].to_string()
        text_output += '\n\ndfBetas: normalized residual\n'
        text_output += output_df[['h', 'k', 'l', 'twoth', 'dsp',
                                  'dfBetas']].to_string()
        text_output += '\n\nNon-linear fit statistics \n'
        text_output += lmfit.fit_report(res_nlin)

        self.widget.plainTextEdit_UCFitOutput.setPlainText(text_output)

        # save jcpds and save output file automatically.
        # ask for filename.  at the moment, simply overwrite
        temp_dir = get_temp_dir(self.model.get_base_ptn_filename())
        ext = "ucfit.jcpds"
        #filen_t = self.model.make_filename(ext)
        filen_t = make_filename(self.template_jcpds.file,
                                ext,
                                temp_dir=temp_dir)
        filen_j = dialog_savefile(self.widget, filen_t)
        if str(filen_j) == '':
            return
        self._write_to_jcpds(filen_j, cell_params)

        # write to a textfile
        ext = "ucfit.output"
        #filen_t = self.model.make_filename(ext)
        filen_t = make_filename(self.template_jcpds.file,
                                ext,
                                temp_dir=temp_dir)
        filen_o = dialog_savefile(self.widget, filen_t)
        if str(filen_o) == '':
            return

        with open(filen_o, "w") as f:
            f.write(text_output)
Ejemplo n.º 19
0
    def train(self, train_data, dev_data):
        """Train function for the neural network

        This method trains the relation classification neural network. All parameters are contained in the configuration
        file. Multiple metrics are computed within this function, including a classification report (per class
        precision, recall, and f1), hits@3, hits@10, and mean reciprocal rank. If the command line parameter to log the
        metrics is passed, all metrics, including the loss for training and development sets, are logged using mlflow.

        Args:
            train_data: training data composed of (embeddings, labels, indices)
            dev_data: development data composed of (embeddings, labels, indices)
        """
        # Epoch level metrics
        epoch_loss = []
        epoch_train_metrics = []
        epoch_train_f1_metrics = []
        epoch_train_hits3 = []
        epoch_train_hits10 = []
        epoch_train_mrr = []
        epoch_dev_metrics = []
        epoch_dev_f1_metrics = []
        epoch_dev_report_metrics = []
        epoch_dev_hits3 = []
        epoch_dev_hits10 = []
        epoch_dev_mrr = []

        classes = list(range(281))
        batches = utils.split_into_batches(train_data,
                                           int(self.config["batch_size"]))

        step = 0  # step counter for mlflow logging

        # Train model for a number of epochs
        for epoch in range(int(self.config["epochs"])):
            start_time = time.time()

            # Batch level metrics
            train_loss = 0
            train_metrics = []
            train_f1_metrics = []
            train_hits3 = []
            train_hits10 = []
            train_mrr = []
            dev_loss = 0
            dev_metrics = []
            dev_f1_metrics = []
            dev_hits3 = []
            dev_hits10 = []
            dev_mrr = []

            # Run training in batches
            for i, batch in enumerate(batches):
                step += 1
                self.optimizer.zero_grad()  # TODO what does it do?
                embs, rels, idxs = (
                    torch.tensor(batch[0]),
                    torch.tensor(batch[1], dtype=torch.long),
                    torch.tensor(batch[2]),
                )
                self.optimizer.zero_grad()
                # embs, rels = embs.cuda(), rels.cuda()
                embs, rels = embs.to(self.device), rels.to(self.device)
                outputs = self.model(embs)
                loss = F.cross_entropy(outputs, rels)
                loss.backward()
                self.optimizer.step()
                train_loss += loss.item()

                # Compute train metrics
                num_corrects = (outputs.argmax(1) == rels).sum().item()
                train_acc = num_corrects / len(rels)
                train_metrics.append(train_acc)
                train_f1 = f1_score(
                    rels.tolist(),
                    outputs.argmax(1).tolist(),
                    average="macro",
                )
                train_f1_metrics.append(train_f1)

                train_ranks = self.get_ranks(outputs, rels.tolist())
                _train_hits3 = metrics.hits_at_k(train_ranks, k=3)
                _train_hits10 = metrics.hits_at_k(train_ranks, k=10)
                train_hits3.append(_train_hits3)
                train_hits10.append(_train_hits10)

                _train_mrr = metrics.mrr(train_ranks)
                train_mrr.append(_train_mrr)

                # Compute development metrics every 500 batches
                if i % 500 == 0:
                    with torch.no_grad():
                        dev_embs, dev_rels, dev_idxs = dev_data
                        dev_embs, dev_rels, dev_idxs = (
                            torch.tensor(dev_embs),
                            torch.tensor(dev_rels, dtype=torch.long),
                            torch.tensor(dev_idxs),
                        )
                        dev_embs, dev_rels = (
                            dev_embs.to(self.device),
                            dev_rels.to(self.device),
                            # dev_embs.cuda()  # dev_embs.to(self.device),
                            # dev_rels.cuda()  # dev_rels.to(self.device),
                        )
                        dev_outputs = self.model(dev_embs)
                        _loss = F.cross_entropy(dev_outputs, dev_rels)
                        dev_loss += _loss.item()
                        num_corrects = (
                            dev_outputs.argmax(1) == dev_rels).sum().item()
                        dev_acc = num_corrects / len(dev_rels)
                        dev_metrics.append(dev_acc)
                        dev_f1 = f1_score(
                            dev_rels.tolist(),
                            dev_outputs.argmax(1).tolist(),
                            average="macro",
                        )
                        dev_f1_metrics.append(dev_f1)
                        dev_report = metrics.get_classification_report(
                            dev_rels.tolist(),
                            dev_outputs.argmax(1).tolist(),  #classes
                        )
                        epoch_dev_report_metrics.append(dev_report)

                        dev_ranks = self.get_ranks(dev_outputs,
                                                   dev_rels.tolist())
                        _dev_hits3 = metrics.hits_at_k(dev_ranks, k=3)
                        _dev_hits10 = metrics.hits_at_k(dev_ranks, k=10)
                        dev_hits3.append(_dev_hits3)
                        dev_hits10.append(_dev_hits10)

                        _dev_mrr = metrics.mrr(dev_ranks)
                        dev_mrr.append(_dev_mrr)

                        # Log metrics
                        if self.params.log_metrics:
                            mlflow.log_metric("Train loss",
                                              train_loss / len(train_metrics),
                                              step=step)
                            mlflow.log_metric("Train acc",
                                              train_acc,
                                              step=step)
                            mlflow.log_metric("Train hitsat3",
                                              _train_hits3,
                                              step=step)
                            mlflow.log_metric("Train hitsat10",
                                              _train_hits10,
                                              step=step)
                            mlflow.log_metric("Train mrr",
                                              _train_mrr,
                                              step=step)
                            mlflow.log_metric("Dev loss",
                                              dev_loss / len(dev_metrics),
                                              step=step)
                            mlflow.log_metric("Dev acc", dev_acc, step=step)
                            mlflow.log_metric("Dev f1", dev_f1, step=step)
                            mlflow.log_metric("Dev hitsat3",
                                              _dev_hits3,
                                              step=step)
                            mlflow.log_metric("Dev hitsat10",
                                              _dev_hits10,
                                              step=step)
                            mlflow.log_metric("Dev mrr", _dev_mrr, step=step)

            secs = int(time.time() - start_time)
            mins = secs / 60
            secs = secs % 60

            self.scheduler.step()
            epoch_loss.append(train_loss / len(train_metrics))
            epoch_train_metrics.append(sum(train_metrics) / len(train_metrics))
            epoch_train_f1_metrics.append(
                sum(train_f1_metrics) / len(train_f1_metrics))
            epoch_train_hits3.append(sum(train_hits3) / len(train_hits3))
            epoch_train_hits10.append(sum(train_hits10) / len(train_hits10))
            epoch_train_mrr.append(sum(train_mrr) / len(train_mrr))
            epoch_dev_metrics.append(sum(dev_metrics) / len(dev_metrics))
            epoch_dev_f1_metrics.append(
                sum(dev_f1_metrics) / len(dev_f1_metrics))
            epoch_dev_hits3.append(sum(dev_hits3) / len(dev_hits3))
            epoch_dev_hits10.append(sum(dev_hits10) / len(dev_hits10))
            epoch_dev_mrr.append(sum(dev_mrr) / len(dev_mrr))

            # Print out results
            if self.params.print_output:
                print(
                    "Epoch: %d" % (epoch + 1),
                    " | time in %d minutes, %d seconds" % (mins, secs),
                )
                print(
                    f"\tEpoch avg Loss: {sum(epoch_loss)/len(epoch_loss):.4f}(train)"
                )
                print(
                    f"\tEpoch avg Acc: {sum(epoch_train_metrics)/len(epoch_train_metrics):.4f} (train)"
                )
                print(
                    f"\tEpoch avg F1: {sum(epoch_train_f1_metrics)/len(epoch_train_f1_metrics):.4f} (train)"
                )
                print(
                    f"\tEpoch avg hitsat3: {sum(epoch_train_hits3)/len(epoch_train_hits3):.4f} (train)"
                )
                print(
                    f"\tEpoch avg hitsat10: {sum(epoch_train_hits10)/len(epoch_train_hits10):.4f} (train)"
                )
                print(
                    f"\tEpoch avg mrr: {sum(epoch_train_mrr)/len(epoch_train_mrr):.4f} (train)"
                )

                print(
                    f"\tEpoch avg Acc: {sum(epoch_dev_metrics)/len(epoch_dev_metrics):.4f} (dev)"
                )
                print(
                    f"\tEpoch avg F1: {sum(epoch_dev_f1_metrics)/len(epoch_dev_f1_metrics):.4f} (dev)"
                )
                print(
                    f"\tEpoch avg hitsat3: {sum(epoch_dev_hits3)/len(epoch_dev_hits3):.4f} (dev)"
                )
                print(
                    f"\tEpoch avg hitsat10: {sum(epoch_dev_hits10)/len(epoch_dev_hits10):.4f} (dev)"
                )
                print(
                    f"\tEpoch avg mrr: {sum(epoch_dev_mrr)/len(epoch_dev_mrr):.4f} (dev)"
                )
                print(f"\tLast Acc: {epoch_dev_metrics[-1]:.4f} (dev)")
                print(f"\tLast F1: {epoch_dev_f1_metrics[-1]:.4f} (dev)")
                print(f"\tEpoch last hitsat3: {epoch_dev_hits3[-1]:.4f} (dev")
                print(
                    f"\tEpoch last hitsat10: {epoch_dev_hits10[-1]:.4f} (dev")
                print(f"\tEpoch last mrr: {epoch_dev_mrr[-1]:.4f} (dev")

            # Log metrics
            if self.params.log_metrics:
                mlflow.log_metric("Epoch Loss",
                                  sum(epoch_loss) / len(epoch_loss),
                                  step=epoch + 1)
                mlflow.log_metric(
                    "Epoch Avg Acc train",
                    sum(epoch_train_metrics) / len(epoch_train_metrics),
                    step=epoch + 1,
                )
                mlflow.log_metric(
                    "Epoch Avg F1 train",
                    sum(epoch_train_f1_metrics) / len(epoch_train_f1_metrics),
                    step=epoch + 1,
                )
                mlflow.log_metric("Epoch Hits at3 train",
                                  epoch_train_hits3[-1],
                                  step=epoch + 1)
                mlflow.log_metric("Epoch Hits at10 train",
                                  epoch_train_hits10[-1],
                                  step=epoch + 1)
                mlflow.log_metric("Epoch MRR train",
                                  epoch_train_mrr[-1],
                                  step=epoch + 1)
                mlflow.log_metric(
                    "Epoch Avg Acc dev",
                    sum(epoch_dev_metrics) / len(epoch_dev_metrics),
                    step=epoch + 1,
                )
                mlflow.log_metric(
                    "Epoch Avg F1 dev",
                    sum(epoch_dev_f1_metrics) / len(epoch_dev_f1_metrics),
                    step=epoch + 1,
                )
                mlflow.log_metric(
                    "Epoch Avg MRR dev",
                    sum(epoch_dev_mrr) / len(epoch_dev_mrr),
                    step=epoch + 1,
                )
                mlflow.log_metric("Epoch Acc dev",
                                  epoch_dev_metrics[-1],
                                  step=epoch + 1)
                mlflow.log_metric("Epoch F1 dev",
                                  epoch_dev_f1_metrics[-1],
                                  step=epoch + 1)
                mlflow.log_metric("Epoch Hits at3 dev",
                                  epoch_dev_hits3[-1],
                                  step=epoch + 1)
                mlflow.log_metric("Epoch Hits at10 dev",
                                  epoch_dev_hits10[-1],
                                  step=epoch + 1)
                mlflow.log_metric("Epoch MRR dev",
                                  epoch_dev_mrr[-1],
                                  step=epoch + 1)

        # print final report
        if self.params.print_output:
            print(epoch_dev_report_metrics[-1])

        # Save model
        if not os.path.isdir("saved_models"):
            os.mkdir("saved_models")
        if not self.model_path:
            self.model_path = "saved_models/" + utils.make_filename(
                self.config)
        else:
            self.model_path.split(".pt")[0] + "_" + time.strftime(
                "%Y%m%d-%H%M%S") + ".pt"
        self.save_model()

        # Save classification report
        report_path = self.model_path.split(".pt")[0] + ".dev.report.tsv"
        report = metrics.get_classification_report(
            # dev_rels.tolist(), dev_outputs.argmax(1).tolist(), classes, output_dict=True
            dev_rels.tolist(),
            dev_outputs.argmax(1).tolist(),
            output_dict=True)
        report_df = pd.DataFrame(report).transpose()
        report_df.to_csv(report_path, sep="\t", index=False)
Ejemplo n.º 20
0
 def make_temp_filenames(self, temp_dir=None):
     bgsub_filen = make_filename(self.fname, 'bgsub.chi', temp_dir=temp_dir)
     bg_filen = make_filename(self.fname, 'bg.chi', temp_dir=temp_dir)
     return bgsub_filen, bg_filen
Ejemplo n.º 21
0
 def test_make_filename_init(self, fname):
     res = utils.make_filename(fname, init_fname="init")
     assert res == "parent\\init.ext"
Ejemplo n.º 22
0
 def test_make_filename(self, fname):
     res = utils.make_filename(fname, ".new_ext", parent="new_parent")
     assert res == "new_parent\\file.new_ext"
Ejemplo n.º 23
0
from train import model_fn_builder, FLAGS
from models.rnn_lstm import create_rnn_lstm_model, LSTMConfig
from models.cnn import CNNConfig, create_cnn_model
from models.cnn_keras import CNNKerasConfig, create_cnnKeras_model
from models.contextualized_cnn import create_contextualized_cnn_model, ContextualizedCNNConfig
from models.fully_connected import create_fully_connected_model, FullyConnectedConfig

DATA_BERT_DIRECTORY = FLAGS.data_bert_directory
BERT_CONFIG_FILE = "%s/bert_config.json" % DATA_BERT_DIRECTORY
bert_config = BertConfig.from_json_file(BERT_CONFIG_FILE)

INIT_CHECKPOINT = FLAGS.output_dir
if FLAGS.init_checkpoint is not None:
    INIT_CHECKPOINT = '%s/%s' % (FLAGS.output_dir, FLAGS.init_checkpoint)

DEV_FILENAME = make_filename('dev', 1., FLAGS.features_dir, FLAGS.fine_tune,
                             FLAGS.n_examples)
print('DEV_FILENAE %s' % DEV_FILENAME)

RawResult = collections.namedtuple("RawResult",
                                   ["unique_id", "start_logits", "end_logits"])


def load_and_save_config(filename):
    with tf.gfile.GFile(filename, 'r') as json_data:
        parsed = json.load(json_data)
        parsed['max_seq_length'] = FLAGS.max_seq_length
        parsed['bert_config'] = bert_config

        create_model = None
        config_class = None
        if parsed['model'] == 'lstm':
Ejemplo n.º 24
0
tf.flags.DEFINE_bool("do_lower_case", True, "Bert embeddings are lower cased.")

DATA_BERT_DIRECTORY = FLAGS.data_bert_directory
BERT_CONFIG_FILE = "%s/bert_config.json" % DATA_BERT_DIRECTORY

OUTPUT_DIR = FLAGS.output_dir + "/" + datetime.now().isoformat()

INIT_CHECKPOINT = None
if FLAGS.fine_tune:
    INIT_CHECKPOINT = '%s/bert_model.ckpt' % DATA_BERT_DIRECTORY
elif FLAGS.init_checkpoint is not None:
    INIT_CHECKPOINT = '%s/%s' % (OUTPUT_DIR, FLAGS.init_checkpoint)

N_TRAIN_EXAMPLES = FLAGS.n_examples
TRAIN_FILE_NAME = make_filename('train', (1.0 - FLAGS.eval_percent),
                                FLAGS.features_dir, FLAGS.fine_tune,
                                N_TRAIN_EXAMPLES)
EVAL_FILE_NAME = make_filename('eval', (FLAGS.eval_percent),
                               FLAGS.features_dir, FLAGS.fine_tune,
                               N_TRAIN_EXAMPLES)

bert_config = BertConfig.from_json_file(BERT_CONFIG_FILE)

N_TOTAL_SQUAD_EXAMPLES = 130319


def load_and_save_config(filename):
    with tf.gfile.GFile(filename, 'r') as json_data:
        parsed = json.load(json_data)
        parsed['max_seq_length'] = FLAGS.max_seq_length
        parsed['bert_config'] = bert_config.to_dict()
Ejemplo n.º 25
0
def get_furlongs_data(route_name=None, weather_category=None,
                      shift_yards_same_elr=220, shift_yards_diff_elr=220,
                      update=False, verbose=False) -> pd.DataFrame:
    """
    Get furlongs data.

    :param route_name: name of a Route; if ``None`` (default), all Routes
    :type route_name: str or None
    :param weather_category: weather category, defaults to ``None``
    :type weather_category: str or None
    :param shift_yards_same_elr: yards by which the start/end mileage is shifted for adjustment,
        given that StartELR == EndELR, defaults to ``220``
    :type shift_yards_same_elr: int or float
    :param shift_yards_diff_elr: yards by which the start/end mileage is shifted for adjustment,
        given that StartELR != EndELR, defaults to ``220``
    :type shift_yards_diff_elr: int or float
    :param update: whether to check on update and proceed to update the package data,
        defaults to ``False``
    :type update: bool
    :param verbose: whether to print relevant information in console as the function runs,
        defaults to ``False``
    :type verbose: bool or int
    :return: data of furlongs for incident locations
    :rtype: pandas.DataFrame

    **Test**::

        from models.prototype.furlong import get_furlongs_data

        weather_category     = None
        shift_yards_same_elr = 220
        shift_yards_diff_elr = 220
        update               = True
        verbose              = True

        route_name = None
        furlongs_data = get_furlongs_data(route_name, weather_category, shift_yards_same_elr,
                                          shift_yards_diff_elr, update, verbose)
        print(furlongs_data)

        route_name = 'Anglia'
        furlongs_data = get_furlongs_data(route_name, weather_category, shift_yards_same_elr,
                                          shift_yards_diff_elr, update, verbose)
        print(furlongs_data)
    """

    filename = "furlongs"
    pickle_filename = make_filename(
        filename, route_name, weather_category, shift_yards_same_elr, shift_yards_diff_elr)
    path_to_pickle = cdd_geodata(pickle_filename)

    if os.path.isfile(path_to_pickle) and not update:
        furlongs_data = load_pickle(path_to_pickle)

    else:
        try:
            # Data of incident furlongs: both start and end identified by the same ELR
            furlongs_data_same_elr = get_furlongs_same_start_end_elrs(
                route_name=route_name, weather_category=weather_category,
                shift_yards_same_elr=shift_yards_same_elr, verbose=verbose)

            # Data of incident furlongs: start and end are identified by different ELRs
            furlongs_data_diff_elr = get_furlongs_diff_start_end_elrs(
                route_name=route_name, weather_category=weather_category,
                shift_yards_diff_elr=shift_yards_diff_elr, verbose=verbose)

            # Merge the above two data sets
            furlongs_data = furlongs_data_same_elr.append(furlongs_data_diff_elr)
            furlongs_data.drop_duplicates(['AssetNumber', 'StructuredPlantNumber'], inplace=True)
            furlongs_data.sort_index(inplace=True)

            save_pickle(furlongs_data, path_to_pickle, verbose=verbose)

        except Exception as e:
            print("Failed to get \"{}\". {}.".format(os.path.splitext(pickle_filename)[0], e))
            furlongs_data = None

    return furlongs_data