Beispiel #1
0
    def status(self, interval=1., timeout=-1, fo=sys.stdout):
        """Waits for the jobs, printing progress at regular intervals

        Args:
            interval (float): a time in seconds, after which to print the progress.
            timeout (float): a time in seconds, after which to give up waiting.
            fo (file): a file object to which the progress is printed.

        """
        if self.ar is None:
            return
        if timeout is None:
            timeout = -1

        # Make sure to write the job results into the job objects.
        self.wait(1e-3)

        tic = time.time()
        while not self.ar.ready() and (timeout < 0 or time.time() - tic <= timeout):
            self.wait(interval)
            clear_output(wait=True)
            dt = datetime.timedelta(seconds=self.ar.elapsed)
            fo.write('{}/{} tasks finished after {}'.format(self.ar.progress, len(self.ar), str(dt)))
            fo.flush()
        else:
            fo.write('\n')
        dt = datetime.timedelta(seconds=self.ar.elapsed)
        clear_output(wait=True)
        fo.write('{} tasks completed in {}\n'.format(len(self.ar), str(dt)))
        fo.flush()
Beispiel #2
0
    def __call__(self, percentage):
        " Update the progress bar within the specified percent_range"
        if self.start_time is None: self.start_time = time.time()
        span = (self.percent_range[1]-self.percent_range[0])
        percentage = self.percent_range[0] + ((percentage/100.0) * span)

        if self.display == 'disabled': return
        elif self.display == 'stdout':
            if percentage==100 and self.elapsed_time:
                elapsed = time.time() -  self.start_time
                if clear_output and not ipython2: clear_output()
                if clear_output and ipython2: clear_output(wait=True)
                sys.stdout.write('\r' + '100%% %s %02d:%02d:%02d'
                                 % (self.label.lower(), elapsed//3600,
                                    elapsed//60, elapsed%60))
                return
            else:
                self._stdout_display(percentage)
            return

        if 'socket' not in self.cache:
            self.cache['socket'] = self._get_socket()

        if self.cache['socket'] is not None:
            self.cache['socket'].send('%s|%s' % (percentage, self.label))
Beispiel #3
0
 def df(self, value):
     """This allows for on the fly data updating so you only need
     to create one BarPlot Dashboard and instantiate the widgets once"""
     self._update_columns(list(value.columns.values))
     self._df = value
     self.update()
     clear_output()
Beispiel #4
0
def print_update(message):
    try:
        clear_output()
    except Exception:
        pass
    print message
    sys.stdout.flush()
Beispiel #5
0
def CreateWeightHist(net, selectedLayers):
    firstLayer = int(selectedLayers.split("->")[0])
    weights = net["layers"][firstLayer]["Weights"]
    n1 = int(weights["row"])
    n2 = int(weights["cols"])
    m = ROOT.TMatrixD(n1, n2+1)
    vec = weights["data"]
    for i in xrange(n1):
        for j in xrange(n2):
            m[i][j] = vec[j+n2*i]
    bvec = net["layers"][firstLayer]["Biases"]["data"]
    if n1!=len(bvec):
        print("Something wrong.. Number of bias weights not equal with the neuron number ("+str(n1)+"!="+str(len(bvec))+")")
        return
    for i in xrange(n1):
        m[i][n2] = bvec[i]
    th2 = ROOT.TH2D(m)
    th2.SetTitle("Weight map for DNN")
    for i in xrange(n2):
        th2.GetXaxis().SetBinLabel(i + 1, str(i))
    th2.GetXaxis().SetBinLabel(n2+1, "B")
    for i in xrange(n1):
        th2.GetYaxis().SetBinLabel(i + 1, str(i))
    th2.GetXaxis().SetTitle("Layer: "+str(firstLayer))
    th2.GetYaxis().SetTitle("Layer: "+str(firstLayer+1))
    th2.SetStats(0)
    th2.SetMarkerSize(1.5)
    th2.SetMarkerColor(0)
    labelSize = 0.040
    th2.GetXaxis().SetLabelSize(labelSize)
    th2.GetYaxis().SetLabelSize(labelSize)
    th2.LabelsOption("d")
    th2.SetLabelOffset(0.011)
    clear_output()
    JPyInterface.JsDraw.Draw(th2, 'drawDNNMap')
Beispiel #6
0
def print_update(message):
    "Print a message immediately; do not wait for current execution to finish."
    try:
        clear_output()
    except Exception:
        pass
    print(message)
    sys.stdout.flush()
Beispiel #7
0
 def flush_ipython(self):
     try:
         clear_output()
     except Exception:
         # terminal IPython has no clear_output
         pass
     print('\r', end='')
     sys.stdout.flush()
Beispiel #8
0
def BookDNN(self, loader, title="DNN"):
    global __BookDNNHelper
    def __bookDNN(optString):
        self.BookMethod(loader, ROOT.TMVA.Types.kDNN, title, optString)
        return
    __BookDNNHelper = __bookDNN
    clear_output()
    JPyInterface.JsDraw.InsertCSS("NetworkDesigner.min.css")
    JPyInterface.JsDraw.Draw("", "NetworkDesigner", True)
Beispiel #9
0
 def animate_ipython(self, iter):
     try:
         clear_output()
     except Exception:
         # terminal IPython has no clear_output
         pass
     print '\r', self,
     sys.stdout.flush()
     self.update_iteration(iter + 1)
Beispiel #10
0
        def refresh_ptcl(change=None, force=False):
            """
            Refresh the current particle figure

            Parameters :
            ------------
            change: dictionary
                Dictionary passed by the widget to a callback functions
                whenever a change of a widget happens
                (see docstring of ipywidgets.Widget.observe)
                This is mainline a place holder ; not used in this function

            force: bool
                Whether to force the update
            """
            # Determine whether to do the refresh
            do_refresh = False
            if self.avail_species is not None:
                if force or ptcl_refresh_toggle.value:
                    do_refresh = True
            # Do the refresh
            if do_refresh:
                plt.figure(ptcl_figure_button.value, figsize=figsize)
                plt.clf()

                # When working in inline mode, in an ipython notebook,
                # clear the output (prevents the images from stacking
                # in the notebook)
                if 'inline' in matplotlib.get_backend():
                    clear_output()

                if ptcl_use_button.value:
                    i_power = ptcl_magnitude_button.value
                    vmin = ptcl_range_button.value[0] * 10 ** i_power
                    vmax = ptcl_range_button.value[1] * 10 ** i_power
                else:
                    vmin = None
                    vmax = None

                if ptcl_yaxis_button.value == 'None':
                    # 1D histogram
                    self.get_particle(t=self.current_t, output=False,
                        var_list=[ptcl_xaxis_button.value],
                        select=ptcl_select_widget.to_dict(),
                        species=ptcl_species_button.value, plot=True,
                        vmin=vmin, vmax=vmax, cmap=ptcl_color_button.value,
                        nbins=ptcl_bins_button.value)
                else:
                    # 2D histogram
                    self.get_particle(t=self.current_t, output=False,
                        var_list=[ptcl_xaxis_button.value,
                                  ptcl_yaxis_button.value],
                        select=ptcl_select_widget.to_dict(),
                        species=ptcl_species_button.value, plot=True,
                        vmin=vmin, vmax=vmax, cmap=ptcl_color_button.value,
                        nbins=ptcl_bins_button.value)
Beispiel #11
0
    def display_ipython(self):
        try:
            clear_output()
        except Exception:
            # terminal IPython has no clear_output
            pass

        print('\r',self,end="")
        sys.stdout.flush()
        time.sleep(0.001)
 def progressBar(cell, starting , current_step, stopping):
     progressing = float(current_step - starting) / (stopping - starting) 
     from IPython.core.display import clear_output
     clear_output(wait = True) 
     import time 
     current_time = time.time()
     print('[{:6.6f} second {:} {:}% {:}]'.format(current_time - cell.start_time
                                           , int(10 * progressing) * '--'
                                           , int(100 * progressing)
                                           , int(10 - 10 * progressing) * '++'))
Beispiel #13
0
 def clicked(b):
     if treeSelector.value>tr.getNTrees():
         treeSelector.value = tr.getNTrees()
     clear_output()
     toJs = {
         "variables": variables,
         "tree": tr.getTree(treeSelector.value)
     }
     json_str = json.dumps(toJs)
     JPyInterface.JsDraw.Draw(json_str, "drawDecisionTree", True)
Beispiel #14
0
    def animate_ipython(self, *args, **kwargs):
        self.update(*args, **kwargs)

        try:
            clear_output()
        except Exception:
            # terminal IPython has no clear_output
            pass
        print('\r {}'.format(self.progressbar), end='')
        sys.stdout.flush()
Beispiel #15
0
    def print_termlogs(self):

        termlogs = self.termlogs()
        if self.has_ipython and not self.has_curses:
            clear_output(wait=True)
        else:
            for i in range(len(self.data) + 1):
                termlogs += "\033[A"

        sys.stdout.write(termlogs)
        sys.stdout.flush()
Beispiel #16
0
 def wait_interactive(self, interval=1., timeout=None):
     """interactive wait, printing progress at regular intervals"""
     N = len(self)
     tic = time.time()
     while not self.ready() and (timeout is None or time.time() - tic <= timeout):
         self.wait(interval)
         clear_output()
         print "%4i/%i tasks finished after %4i s" % (self.progress, N, self.elapsed),
         sys.stdout.flush()
     print
     print "done"
Beispiel #17
0
 def update(self, percentage):
     " Update the progress bar to the given percentage value "
     if clear_output: clear_output()
     percent_per_char = 100.0 / self.width
     char_count = int(math.floor(percentage/percent_per_char) if percentage<100.0 else self.width)
     blank_count = self.width - char_count
     print '\r', "[%s%s] %0.1f%%" % (self.fill_char * char_count,
                           ' '*len(self.fill_char)*blank_count,
                           percentage)
     sys.stdout.flush()
     time.sleep(0.0001)
Beispiel #18
0
def wait_for_kill(asyncres, view, updateInterval = 1.):
    from IPython.core.display import clear_output
    N = len(asyncres)
    print "%4i/%i tasks finished after %4i s" % (asyncres.progress, N, asyncres.elapsed), 
    while not asyncres.ready():
        asyncres.wait(updateInterval)
        clear_output()
        print "%4i/%i tasks finished after %4i s" % (asyncres.progress, N, asyncres.elapsed),
        sys.stdout.flush()
    print ""
    view.shutdown()
    print "done"
Beispiel #19
0
 def wait_interactive(self, interval=1.0, timeout=-1):
     """interactive wait, printing progress at regular intervals"""
     if timeout is None:
         timeout = -1
     N = len(self)
     tic = time.time()
     while not self.ready() and (timeout < 0 or time.time() - tic <= timeout):
         self.wait(interval)
         clear_output(wait=True)
         print("%4i/%i tasks finished after %4i s" % (self.progress, N, self.elapsed), end="")
         sys.stdout.flush()
     print("\ndone")
Beispiel #20
0
 def _stdout_display(self, percentage):
     if clear_output and not ipython2: clear_output()
     if clear_output and ipython2: clear_output(wait=True)
     percent_per_char = 100.0 / self.width
     char_count = int(math.floor(percentage/percent_per_char)
                      if percentage<100.0 else self.width)
     blank_count = self.width - char_count
     sys.stdout.write('\r' + "%s[%s%s] %0.1f%%"
                      % (self.label+':\n' if self.label else '',
                         self.fill_char * char_count,
                         ' '*len(self.fill_char) * blank_count,
                         percentage))
     sys.stdout.flush()
     time.sleep(0.0001)
Beispiel #21
0
def wazzup_slaves(direct_view, n_lines=3):
    while True:
        from IPython.core.display import clear_output
        clear_output()

        wazzups = direct_view.apply(wazzup, n_lines).get()

        for hostname, wazzup_val in zip(get_hostnames(direct_view).iteritems(), wazzups):
            print "\x1b[31m"+str(hostname)+"\x1b[0m"
            print "".join(wazzup_val)
            print ""
            sys.stdout.flush()

        time.sleep(1)
Beispiel #22
0
    def render(self, chart_type, data):
        if chart_type == 'linechart':
            chart = pygal.XY(height=self.height,
                             width=self.width,
                             x_label_rotation=45)

            chart.title = self.title

            for series_name in data.keys():
                series = data[series_name]
                chart.add(series_name, series)

        elif chart_type == 'timechart':
            chart = pygal.DateTimeLine(height=self.height,
                                       width=self.width,
                                       x_label_rotation=45,
                                       x_value_formatter=moment.datetime_to_iso8601)

            chart.title = self.title

            for series_name in data.keys():
                series = data[series_name]
                chart.add(series_name, series)

        elif chart_type == 'barchart':
            chart = pygal.Bar(height=self.height, width=self.width)
            chart.title = self.title

            chart.x_labels = [category for (category, _) in data[list(data.keys())[0]]]

            for series_name in data.keys():
                series = [value for (_, value) in data[series_name]]
                chart.add(series_name, series)

        else:
            raise FlumeException('unsupported chart type "%s" for pygal view'
                                   % chart_type)

        if is_ipython():
            from IPython.core.display import display, HTML, clear_output
            clear_output(wait=True)
            display(HTML(chart.render()))

        elif self.filename.endswith('.png'):
            chart.render_to_png(self.filename)

        elif self.filename.endswith('.html'):
            with open(self.filename, 'w') as output:
                output.write(HTML_TEMPLATE % chart.render())
Beispiel #23
0
    def print_termlogs(self, training_state):

        termlogs = self.termlogs(
            step=training_state.step,
            global_loss=training_state.global_loss,
            global_acc=training_state.global_acc)

        if self.has_ipython and not self.has_curses:
            clear_output(wait=True)
        else:
            for i in range(len(self.data) + 1):
                termlogs += "\033[A"

        sys.stdout.write(termlogs)
        sys.stdout.flush()
Beispiel #24
0
 def update_ipython(self, value):
     "Updates the progress bar to a new value."
     assert 0 <= value <= self.maxval
     self.currval = value
     if not self._need_update() or self.finished:
         return
     if not self.start_time:
         self.start_time = time.time()
     self.seconds_elapsed = time.time() - self.start_time
     self.prev_percentage = self.percentage()
     if self.clear_output is True:
         clear_output()
     print "\r", self._format_line(),
     sys.stdout.flush()
     if value == self.maxval:
         self.finished = True
Beispiel #25
0
    def print_termlogs(self, training_state):

        termlogs = self.termlogs(
            step=training_state.step,
            global_loss=training_state.global_loss,
            global_acc=training_state.global_acc,
            step_time=training_state.step_time_total)

        if self.has_ipython and not CURSES_SUPPORTED:
            clear_output(wait=True)
        else:
            for i in range(len(self.data) + 1):
                termlogs += "\033[A"

        sys.stdout.write(termlogs)
        sys.stdout.flush()
Beispiel #26
0
 def _stdout_display(self, percentage, display=True):
     if clear_output and not ipython2: clear_output()
     if clear_output and ipython2: clear_output(wait=True)
     percent_per_char = 100.0 / self.width
     char_count = int(math.floor(percentage/percent_per_char)
                      if percentage<100.0 else self.width)
     blank_count = self.width - char_count
     prefix = '\n' if len(self.current_progress) > 1 else ''
     self.out =  prefix + ("%s[%s%s] %0.1f%%" %
                           (self.label+':\n' if self.label else '',
                            self.fill_char * char_count,
                            ' '*len(self.fill_char) * blank_count,
                            percentage))
     if display:
         sys.stdout.write(''.join([pg.out for pg in self.current_progress]))
         sys.stdout.flush()
         time.sleep(0.0001)
  def update_display(self, force=False):
    """Updates display on the frontend.

    Retrieves the latest execution status by querying CacheManager and updates
    display on the fronend. The assumption is that there is only one pipeline in
    a cell, because it clears up everything in the cell output every update
    cycle.

    Args:
      force: (bool) whether to force updating when no stats change happens.
    """
    with self._lock:
      stats_updated = False

      for pcoll_id, stats in self._pcollection_stats.items():
        cache_label = stats['cache_label']
        version = stats['version']

        if force or not self._cache_manager.is_latest_version(
            version, 'sample', cache_label):
          pcoll_list, version = self._cache_manager.read('sample', cache_label)
          stats['sample'] = pcoll_list
          stats['version'] = version
          stats_updated = True

          if pcoll_id in self._analyzer.tl_referenced_pcoll_ids():
            self._text_to_print[pcoll_id] = (str(
                '%s produced %s' % (
                    self._producers[pcoll_id],
                    interactive_pipeline_graph.format_sample(pcoll_list, 5))))

      if force or stats_updated:
        self._pipeline_graph.update_pcollection_stats(self._pcollection_stats)

        if IPython:
          from IPython.core import display
          display.clear_output(True)
          rendered_graph = self._renderer.render_pipeline_graph(
              self._pipeline_graph)
          display.display(display.HTML(rendered_graph))

        _display_progress('Running...')
        for text in self._text_to_print.values():
          if text != "":
            _display_progress(text)
Beispiel #28
0
        def refresh_field(change=None, force=False):
            """
            Refresh the current field figure

            Parameters :
            ------------
            change: dictionary
                Dictionary passed by the widget to a callback functions
                whenever a change of a widget happens
                (see docstring of ipywidgets.Widget.observe)
                This is mainline a place holder ; not used in this function

            force: bool
                Whether to force the update
            """
            # Determine whether to do the refresh
            do_refresh = False
            if (self.avail_fields is not None):
                if force or fld_refresh_toggle.value:
                    do_refresh = True
            # Do the refresh
            if do_refresh:
                plt.figure(fld_figure_button.value, figsize=figsize)
                plt.clf()

                # When working in inline mode, in an ipython notebook,
                # clear the output (prevents the images from stacking
                # in the notebook)
                if 'inline' in matplotlib.get_backend():
                    clear_output()

                if fld_use_button.value:
                    i_power = fld_magnitude_button.value
                    vmin = fld_range_button.value[0] * 10 ** i_power
                    vmax = fld_range_button.value[1] * 10 ** i_power
                else:
                    vmin = None
                    vmax = None

                self.get_field(t=self.current_t, output=False, plot=True,
                    field=fieldtype_button.value, coord=coord_button.value,
                    m=convert_to_int(mode_button.value),
                    slicing=slicing_button.value, theta=theta_button.value,
                    slicing_dir=slicing_dir_button.value,
                    vmin=vmin, vmax=vmax, cmap=fld_color_button.value)
Beispiel #29
0
    def notify_complete(self, del_time, return_name, return_shape):
        """Execute JavaScipt code that shows when a query is complete."""
        pretty_time = time.strftime('%I:%M:%S %p %Z')
        cell_id = int(time.time())
        # fixes issue where browser pops up on reload
        # without this, javascript is executed every
        # time notebook is loaded
        cur_time = (1+time.time())*1000.
        string_args = {
            'pretty_time': pretty_time,
            'del_time': del_time,
            'cell_id': cell_id,
            'return_name': return_name,
            'return_shape': return_shape,
            'cur_time': cur_time
        }
        add_cell_id = '<a id="{cell_id}"></a>\n'.format(cell_id=cell_id)
        alert_str = '''
        <script>
        function notifyMe() {{
          if (Notification.permission !== "granted")
            Notification.requestPermission();
          else {{
            var notification = new Notification('Query Finished in {del_time:2.2f} m', {{
              icon: 'https://raw.githubusercontent.com/crawles/Logos/master/jupyter.png?raw=true',
              body: "Name: {return_name}\\nDimensions: {return_shape}",
            }});

            notification.onclick = function () {{
              document.getElementById('{cell_id}').scrollIntoView();
            }};

          }}
        }}
        var isIE = /*@cc_on!@*/false || !!document.documentMode;
        // prevents notifications from popping up when notebook is re-opened
        if (Date.now() < {cur_time} && !isIE) {{
        notifyMe(); }};
        </script>
        '''.format(**string_args)
        html_str = add_cell_id + alert_str
        self.shell.displayhook(HTML(html_str))
        # clear output bc displayhook creates [Out] cell, cluttering notebook
        clear_output()
Beispiel #30
0
def CreateWeightHist(net, selectedLayers):
    weightStartIndex = 0
    numberOfWeights = 0
    firstLayer=int(selectedLayers.split("->")[0])
    for i in xrange(firstLayer):
        n1=int(net["layers"][i-1]["Nodes"])
        n2=int(net["layers"][i]["Nodes"])
        weightStartIndex += int(n1*n2)
    n1 = 1
    n2 = 1
    if firstLayer>0:
        n1 = int(net["layers"][firstLayer-1]["Nodes"])
        n2 = int(net["layers"][firstLayer]["Nodes"])
    else:
        n2 = int(net["layers"][firstLayer]["Nodes"])
    numberOfWeights = n1*n2
    m = ROOT.TMatrixD(n1, n2)
    for i in xrange(n1):
        for j in xrange(n2):
            idx = j+n2*i
            if idx>numberOfWeights:
                print("Something is wrong...")
                continue
            m[i][j] = float(net["synapses"]["synapses"][weightStartIndex+idx])
    th2 = ROOT.TH2D(m)
    th2.SetTitle("Weight map for DNN")
    for i in xrange(n2):
        th2.GetXaxis().SetBinLabel(i + 1, str(i))
    for i in xrange(n1):
        th2.GetYaxis().SetBinLabel(i + 1, str(i))
    th2.GetXaxis().SetTitle("Layer: "+str(firstLayer+1))
    th2.GetYaxis().SetTitle("Layer: "+str(firstLayer))
    th2.SetStats(0)
    th2.SetMarkerSize(1.5)
    th2.SetMarkerColor(0)
    labelSize = 0.040
    th2.GetXaxis().SetLabelSize(labelSize)
    th2.GetYaxis().SetLabelSize(labelSize)
    th2.LabelsOption("d")
    th2.SetLabelOffset(0.011)
    clear_output()
    JPyInterface.JsDraw.Draw(th2, 'drawDNNMap')
Beispiel #31
0
 def __clear(self):
     try:
         clear_output()
     except Exception:
         "# terminal IPython has no clear_output"
         pass
    def create_features(self,
                        low_memory=True,
                        broke_age=True,
                        save_files=None,
                        skip_number=0,
                        stop_number=0):
        '''Финальная функция, которая запускает все остальные. Подгружает датасеты и перерабатывает их в
        train and test DataFrames. Low_memory предназначена для ПК с малым количеством оперативной памяти.
        Информация из датасета по покупкам подгружается постепенно.
        broke_age - включает простую модель, которая предсказывает возраст клиента и исправляет явно ошибочный возраст.
        skip_number - число chunk , которое нужно пропустить. Применяется если часть файла уже обработана.
        stop_number - чило chunks на котором остановиться подрузка файла.
        Это нужно, чтобы обработать файл в несколько подходов, для малой оперативной памяти.
        '''
        self.low_memory = low_memory
        start_time = time.time()
        df_clients = pd.read_csv(self.clients_path)
        train = pd.read_csv(self.train_path)
        test = pd.read_csv(self.test_path)
        df_features = self.features_processing_from_clients_df(df_clients)
        if low_memory:
            purchases_chunks = pd.read_csv(self.purchases_path,
                                           chunksize=10**6)
            features_set = pd.DataFrame()
            data = []
            count = 0

            for chunk in purchases_chunks:
                count += 1
                if count <= skip_number:
                    clear_output()
                    print(f'Пропущен {count}-й chunk из {skip_number} chunks')
                else:
                    print(f'Обрабатывается {count}-й chunk из {46} chunks')
                    chunk.fillna(0, inplace=True)
                    chunk['hour'] = pd.to_datetime(
                        chunk['transaction_datetime']).dt.hour
                    chunk['dayofweek'] = pd.to_datetime(
                        chunk['transaction_datetime']).dt.dayofweek
                    data += self.features_processing_from_purchases(chunk)
                    if count == stop_number:
                        break

            print('Создается общий датафрейм...')
            features_set = pd.DataFrame(data)
        else:
            #print('Создается общий датафрейм...')
            purchases = pd.read_csv(self.purchases_path)
            purchases['hour'] = pd.to_datetime(
                purchases['transaction_datetime']).dt.hour
            purchases['dayofweek'] = pd.to_datetime(
                purchases['transaction_datetime']).dt.dayofweek
            features_set = pd.DataFrame(
                self.features_processing_from_purchases(purchases))

        features_set = pd.merge(features_set, df_features, how='inner')
        if broke_age:
            features_set = self.predict_broke_age(features_set)
        print('Создаются раздельные датафреймы...')
        features_set_train = pd.merge(features_set, train, how='inner')
        features_set_test = pd.merge(features_set, test, how='inner')

        if save_files:

            if stop_number != 0:
                new_path = self.path + 'feat_sets/'
                print(f'Данные сохраняются в директорию {new_path}...')
                features_set_train.to_csv(new_path + 'features_set_train_to_' +
                                          str(stop_number) + '.csv',
                                          index=False)
                features_set_test.to_csv(new_path + 'features_set_test_to_' +
                                         str(stop_number) + '.csv',
                                         index=False)
                print(f'Файлы успешно сохранены в директорию {new_path}')
            else:
                print(f'Данные сохраняются в директорию {self.path}...')
                features_set_train.to_csv(self.path + 'features_set_train.csv',
                                          index=False)
                features_set_test.to_csv(self.path + 'features_set_test.csv',
                                         index=False)
                print(f'Файлы успешно сохранены в директорию {self.path}')

        print(
            f'Весь процесс обработки данных занял {int(time.time()-start_time)} секунд'
        )
        print(
            f'Тренировочный датасет состоит из обработанных данных {features_set_train.shape[0]} клиентов и {features_set_train.shape[1]-2} сгенерированных признаков'
        )
        print(
            f'Тестовый датасет состоит из обработанных данных {features_set_test.shape[0]} клиентов и {features_set_test.shape[1]} сгенерированных признаков'
        )

        if save_files == False:
            return features_set_train, features_set_test
Beispiel #33
0
    def run(self, num_iter=None, plot_stats=None, plot_period=1, history=None):
        # initialize plots
        plot_stats = plot_stats or []
        if plot_stats:
            num_plots = len(plot_stats)
            fig, axs = plt.subplots(num_plots,
                                    1,
                                    squeeze=False,
                                    figsize=(10, 5 * num_plots))
            axs = axs.ravel()

        # initialize history dict
        history = history or {}
        history = defaultdict(list, history)

        total_steps = history.get("total_steps", [0])[-1]
        total_episodes = history.get("total_episodes", [0])[-1]

        num_iter = num_iter or self.num_iter
        for i in range(num_iter):
            # Store statistics of the current update step
            stats = defaultdict(list)
            # sample a training batch
            train_batch, train_batch_stats = self.sample_batch()
            # create a datatset
            obs = train_batch[SampleBatch.OBS]
            actions_old = train_batch[SampleBatch.ACTIONS]
            action_old_log_p = train_batch[SampleBatch.ACTION_LOGP]
            advantages = train_batch[SampleBatch.ADVANTAGES].astype("float32")
            value_targets = train_batch[SampleBatch.VALUE_TARGETS].astype(
                "float32")
            old_value_pred = train_batch[SampleBatch.VF_PREDS].astype(
                "float32")
            dataset = (tf.data.Dataset.from_tensor_slices((
                obs,
                advantages,
                action_old_log_p,
                actions_old,
                value_targets,
                old_value_pred,
            )).batch(self.sgd_minibatch_size, drop_remainder=True).shuffle(1))
            for (
                    obs_batch,
                    advantage_batch,
                    action_old_log_p_batch,
                    action_old_batch,
                    value_target_batch,
                    old_value_pred_batch,
            ) in dataset:
                # if critic is not used critic loss is zero
                critic_loss = tf.constant([0])
                # update critic
                if self.use_critic:
                    critic_loss = self.train_op_critic(obs_batch,
                                                       value_target_batch,
                                                       old_value_pred_batch)
                # update actor
                policy_loss, mean_entropy, actor_loss = self.train_op_actor(
                    obs_batch, action_old_batch, action_old_log_p_batch,
                    advantage_batch)
                stats["policy_loss"].append(policy_loss.numpy().item())
                stats["mean_entropy"].append(mean_entropy.numpy().item())
                stats["actor_loss"].append(actor_loss.numpy().item())
                stats["critic_loss"].append(critic_loss.numpy().item())

            mean_stats = {k: np.mean(v) for k, v in stats.items()}
            mean_stats = dict(**mean_stats, **train_batch_stats)
            # record total steps per game and episodes per iteration
            total_steps += mean_stats["num_steps"]
            total_episodes += mean_stats["num_episodes"]
            mean_stats["total_steps"] = total_steps
            mean_stats["total_episodes"] = total_episodes

            for k, v in mean_stats.items():
                history[k].append(v)
                if self.use_tensorboard:
                    tf.summary.scalar(k, v, self.total_iters)

            if plot_stats:
                if (i + 1) % plot_period == 0:
                    for ax, stat_name in zip(axs, plot_stats):
                        ax.clear()
                        if isinstance(stat_name, str):
                            stat_name = [stat_name]
                        for s in stat_name:
                            sns.lineplot(
                                x=np.arange(len(history[s])),
                                y=history[s],
                                ax=ax,
                            )
                        ax.set_title(stat_name)
                    display.display(fig)
                    display.clear_output(wait=True)
            else:
                print(f"Iteration: {i+1}/{self.num_iter} | {mean_stats}", )

            self.total_iters += 1

        return history
Beispiel #34
0
 def display(self):
     """
     This function is used to display all widgets of the UI.
     """
     clear_output()
     display(ipywidgets.VBox(self._widget_list))
Beispiel #35
0
def main():
    #Lists to iterate through to switch the url year and page number
    years_url = [str(i) for i in range(2000, 2019)]
    pages = ['0', '51', '101', '151']

    #Lists that will store the movie data
    names = []
    years = []
    view_ratings = []
    genres = []
    imdb_ratings = []
    metascores = []
    poster_links = []

    numRequests = 0

    for year_url in years_url:

        for page in pages:

            url = 'https://www.imdb.com/search/title?release_date={}-01-01,{}-12-31&sort=num_votes,desc&start={}&ref_=adv_prv'.format(
                year_url, year_url, page)
            response = get(url)

            #Pause the loop anywhere from 8 to 15 seconds
            sleep(randint(8, 15))

            numRequests += 1
            print('Request: {}'.format(numRequests))
            clear_output(wait=True)

            html_soup = BeautifulSoup(response.text, 'html.parser')

            movie_containers = html_soup.find_all(
                'div', class_='lister-item mode-advanced')

            for movie in movie_containers:

                if movie.find('div', class_='ratings-metascore') != None:
                    #Add name of movie to names list
                    name = movie.h3.a.text
                    names.append(name)

                    #Add year of movie to years list
                    year = movie.h3.find('span',
                                         class_='lister-item-year').text
                    years.append(year)

                    #Add the legal view rating to the view ratings list
                    view_rating = movie.p.span.text
                    view_ratings.append(view_rating)

                    #Add genre to the genres list
                    genre = movie.p.find('span', class_='genre').text
                    genres.append(genre)

                    #Add imdb rating to imdb ratings list
                    imdb_rating = float(movie.strong.text)
                    imdb_ratings.append(imdb_rating)

                    #Add metascore to metascores list
                    metascore = movie.find('span', class_='metascore').text
                    metascores.append(int(metascore))

                    #Add poster link to the poster links list
                    poster_link = movie.a.img.get('loadlate')
                    poster_links.append(poster_link)

    movie_data = makeDataFrame(names, years, view_ratings, genres,
                               imdb_ratings, metascores, poster_links)
    movie_data.to_csv('movie_data.csv', index=False)
Beispiel #36
0
    def _run(self, _=None):
        # Get widgets state
        search_text = self.text.value
        show_url = self.show_url.value
        show_abstract = self.show_abstract.value
        n_results = int(self.n_results.value)

        # Check if this is a new search or simply new stuff to show
        if self._last_search != search_text:
            self.progress_label.value = "Searching!"

            # Search through wikipedia
            search_results = self.wikipedia.search(query=search_text)
            self._last_search = search_text

            # Get most probably indices
            self._search_indices = search_results.index.tolist()

        # Get documents
        documents = [
            self.wikipedia.documents[val]
            for val in self._search_indices[:n_results]
        ]

        # Output table
        titles = [doc.title for doc in documents]
        if all(["Wikipedia: " in val for val in titles]):
            titles = [val[len("Wikipedia: "):] for val in titles]
        table = [titles]
        header = ["title"]

        # Add content
        if show_url:
            table.append([doc.url for doc in documents])
            header.append("URL")
        if show_abstract:
            table.append([doc.abstract for doc in documents])
            header.append("Abstract")

        # Transpose
        table = list(zip(*table))

        # Make dataframe
        self.table = pd.DataFrame(table,
                                  columns=header,
                                  index=list(range(1,
                                                   len(table) + 1)))

        def make_hyperlink(val):
            return '<a href="{}" rel="noopener noreferrer" target="_blank">{}</a>'.format(
                val, val)

        # Set table style and use 1-indexing
        styles = [
            dict(selector="th",
                 props=[("text-align", "left"), ("font-size", "120%")]),
            dict(selector="td", props=[("text-align", "left")]),
        ]
        table_display = self.table.style.set_table_styles(styles) \
            .format({'URL': make_hyperlink})

        # Clear output and show widgets + results
        clear_output()
        # noinspection PyTypeChecker
        display(self.dashboard)
        # noinspection PyTypeChecker
        display(table_display)
        self.table = self.table

        # Update is done
        self.progress_label.value = ""
Beispiel #37
0
    def process_and_plot(self):
        """Process data and plot it"""
        self.logger.debug('At')

        x = self.view.viz_ddn_plot_xaxis.value
        y = self.view.viz_ddn_plot_yaxis.value
        pivot = self.view.viz_ddn_plot_pivot.value
        fill = self.view.viz_ddn_plot_fill.value
        harm_row = self.view.viz_ddn_plot_harm_row.value
        harm_col = self.view.viz_ddn_plot_harm_col.value

        # Specify numeric axis(es)
        numeric_xy = (x == F_VAL or x == F_YER, y == F_VAL or y == F_YER)

        # Plot will be based on model's "processed" data
        self.model.init_processed()

        # Clear pivot table data
        with self.view.viz_out_plot_data:
            clear_output(wait=True)

        self.model.pivot(x, pivot, y, self.view.viz_ddn_plot_aggfunc.value)

        # Fill missing values (interpolate)?
        if not fill == NONE_ITEM:
            self.model.fill(fill)

        # Index to year?

        indexed_by = None

        if self.view.viz_ckb_plot_index.value:

            if x == F_YER and not harm_row == NONE_ITEM:
                indexed_by = harm_row
            elif y == F_YER and not harm_col == NONE_ITEM:
                self.model.index(harm_col, on_row=False)
                indexed_by = harm_col

            if indexed_by is not None:
                self.model.index(indexed_by)

        # Harmonize?

        harmonized = False

        if (not harm_row == NONE_ITEM) and (not harm_col == NONE_ITEM):
            self.model.harmonize(harm_row, harm_col)
            harmonized = True

        # Title

        title = y + ' for ' + pivot + ' by ' + x

        if indexed_by is not None:
            if harmonized:
                title += ', Harmonized: '

                if indexed_by == harm_row:
                    title += str(harm_col)
                else:
                    title += str(harm_row)

            title += ', Index: ' + str(indexed_by) + '=100'

        elif harmonized:
            title += ', Harmonized: ' + str(harm_row) + ', ' + str(harm_col)

        self.model.dropna()

        # Show plot data
        with self.view.viz_out_plot_data:
            self.model.set_disp(self.model.processed, wide=True)
            clear_output(wait=True)
            display(self.model.processed)

        # Draw plot based on processed data
        self.draw_plot(title, x, y, numeric_xy)
def main():
    url = 'https://www.lagou.com/jobs/positionAjax.json?needAddtionalResult=false&isSchoolJob=0'

    headers = {
        'Host': 'www.lagou.com',
        'User-Agent':
        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36',
        'Accept': 'application/json, text/javascript, */*; q=0.01',
        'Accept-Language': 'zh-CN,en-US;q=0.7,en;q=0.3',
        'Accept-Encoding': 'gzip, deflate, br',
        'Referer':
        'https://www.lagou.com/jobs/list_Python?labelWords=&fromSearch=true&suginput=',
        'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
        'X-Requested-With': 'XMLHttpRequest',
        'X-Anit-Forge-Token': 'None',
        'X-Anit-Forge-Code': '0',
        'Content-Length': '26',
        # 'Cookie': 'user_trace_token=20171103191801-9206e24f-9ca2-40ab-95a3-23947c0b972a; _ga=GA1.2.545192972.1509707889; LGUID=20171103191805-a9838dac-c088-11e7-9704-5254005c3644; JSESSIONID=ABAAABAACDBABJB2EE720304E451B2CEFA1723CE83F19CC; _gat=1; LGSID=20171228225143-9edb51dd-ebde-11e7-b670-525400f775ce; PRE_UTM=; PRE_HOST=www.baidu.com; PRE_SITE=https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DKkJPgBHAnny1nUKaLpx2oDfUXv9ItIF3kBAWM2-fDNu%26ck%3D3065.1.126.376.140.374.139.129%26shh%3Dwww.baidu.com%26sht%3Dmonline_3_dg%26wd%3D%26eqid%3Db0ec59d100013c7f000000055a4504f6; PRE_LAND=https%3A%2F%2Fwww.lagou.com%2F; LGRID=20171228225224-b6cc7abd-ebde-11e7-9f67-5254005c3644; index_location_city=%E5%85%A8%E5%9B%BD; TG-TRACK-CODE=index_search; SEARCH_ID=3ec21cea985a4a5fa2ab279d868560c8',
        'Connection': 'keep-alive',
        'Pragma': 'no-cache',
        'Cache-Control': 'no-cache',
    }

    cookies = {
        'Cookie':
        'user_trace_token=20180309150141-1071c3b86d9340849221995337e696ab;'
        '_ga=GA1.2.26456326.1520578906; _gid=GA1.2.1110652441.1520578906; '
        'LGUID=20180309150146-bb3feaf5-2367-11e8-a4d0-525400f775ce; _gat=1; '
        'LGSID=20180309220646-1a31dfca-23a3-11e8-a7fa-525400f775ce; '
        'PRE_UTM=m_cf_cpt_baidu_pc; '
        'PRE_HOST=www.baidu.com; PRE_SITE=https%3A%2F%2Fwww.baidu.com%2Fs%3Fwd%3D%25E6%258B%2589%25E5%258B%25BE%25E7%25BD%2591%26rsv_spt%3D1%26rsv_iqid%3D0xc5bd295800005d78%26issp%3D1%26f%3D8%26rsv_bp%3D0%26rsv_idx%3D2%26ie%3Dutf-8%26tn%3Dbaiduhome_pg%26rsv_enter%3D1%26rsv_sug3%3D4%26rsv_sug1%3D4%26rsv_sug7%3D100; '
        'PRE_LAND=https%3A%2F%2Fwww.lagou.com%2Flp%2Fhtml%2Fcommon.html%3Futm_source%3Dm_cf_cpt_baidu_pc; '
        'JSESSIONID=ABAAABAAADEAAFIFD520A39CCC6F8DB23972BE849E096FD; '
        'Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1520578906,1520604405,1520604412; '
        'index_location_city=%E5%85%A8%E5%9B%BD; hideSliderBanner20180305WithTopBannerC=1; '
        'TG-TRACK-CODE=index_search; SEARCH_ID=00df79401e894f288caf5da82944a99d; '
        'Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1520604432; '
        'LGRID=20180309220712-2a00fa16-23a3-11e8-b1a7-5254005c3644',
    }

    data = {
        'first': 'false',
        'pn': 1,
        'kd': 'python',
    }

    page = get_page(url, cookies, headers, data)
    page_num = get_page_num(page)
    pages = [i for i in range(1, page_num)]

    start_time = time.time()
    # moniter parameter
    requests = 0

    for page in range(1, pages):
        # a request would go here
        print('爬取第{}页'.format(page))
        data['pn'] = page
        html = get_page(url, cookies, headers, data)
        time.sleep(random.randint(5, 8))

        requests += 1
        elapsed_time = time.time() - start_time
        print('Request: {}; Frequency: {} requests/s'.format(
            requests, requests / elapsed_time))
        clear_output(wait=True)
        contents = parse_job(html)
        save_to_file(contents)
Beispiel #39
0
def show_record(sender):
    '''displays wins and losses. can only be called at the end of a round.'''
    global wins, losses, submit_flag
    if not submit_flag:
        clear_output()
        print('wins: {}\nlosses:{}'.format(wins, losses))
Beispiel #40
0
 def print_stats(self) -> None:
     clear_output()
     self.display_cp()
     self.print_graphs()
Beispiel #41
0
    def run_agent(self, render: bool = False, print_swarm: bool = False):
        """

        :param render:
        :param print_swarm:
        :return:
        """

        self.tree.reset()
        i_step, self._agent_reward, end = 0, 0, False
        self._save_steps = []
        # Clone emulator state w/ system state including pseudorandomness.
        # Restoring this state will give an identical environment.
        state, obs = self._env.reset(return_state=True)
        self.tree.append_leaf(i_step,
                              parent_id=i_step - 1,
                              state=state,
                              action=0,
                              dt=1)
        reward_sum = 0

        net = dqn_model.DQN(self._env.observation_space.shape,
                            self._env.action_space.n).to(device)
        tgt_net = dqn_model.DQN(self._env.observation_space.shape,
                                self._env.action_space.n).to(device)
        buffer = dqn_agent.ExperienceBuffer(REPLAY_SIZE)
        agent = dqn_agent.Agent(self._env, buffer)
        optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE)

        current_obs = obs
        start_time = time.time()

        while not end and self._agent_reward < self.reward_limit:
            i_step += 1

            self.run_swarm(state=state.copy(), obs=obs)
            action = self.weight_actions()

            state, obs, _reward, _end, info = self._env.step(
                state=state, action=action, n_repeat_action=self.min_dt)

            # if not _end:
            #     next_obs = obs
            # else:
            #     next_obs = None
            #
            next_obs = obs

            #print("come here")

            exp = Experience(current_obs, action, _reward, _end, next_obs)
            current_obs = next_obs
            agent.exp_buffer.append(exp)

            reward_sum += _reward
            self.tree.append_leaf(i_step,
                                  parent_id=i_step - 1,
                                  state=state,
                                  action=action,
                                  dt=self._env.n_repeat_action)
            self._agent_reward += _reward
            self._last_action = action
            end = info.get("terminal", _end)

            #if _reward != 0:
            #    print("i_step,_reward,_end", i_step, _reward, _end)
            if _end:
                print('ep %d: game over. episode reward total was %f' %
                      (i_step, reward_sum))

            if render:
                self._env.render()
            if print_swarm:
                print(self)
                clear_output(True)
            self.update_parameters()

        # train dqn model
        print("experiences exploration time/seconds:",
              time.time() - start_time)
        print("**************dqn agent training...*******************")
        #num_episodes = 1000
        num_episodes = 10000
        reward_sum = 0
        start_time = time.time()
        for i_episode in range(num_episodes):
            optimizer.zero_grad()
            batch = agent.exp_buffer.sample(BATCH_SIZE)
            loss_t = dqn_agent.calc_loss(batch, net, tgt_net, device=device)
            loss_t.backward()
            optimizer.step()
        print("Train time/seconds:", time.time() - start_time)
        print("#############dqn agent testing...############")
        env = gym.make('Pong-v0')
        current_obs = env.reset()
        start_time = time.time()
        while True:
            state_a = np.array([current_obs], copy=False)
            state_v = torch.tensor(state_a, dtype=torch.float).to(device)
            q_vals_v = net(state_v)
            _, act_v = torch.max(q_vals_v, dim=1)
            action = int(act_v.item())
            new_state, _reward, _end, _ = env.step(action)
            current_obs = new_state

            #if _reward != 0:
            #    print("_reward,_end", _reward, _end)
            reward_sum += _reward
            if _end:
                print('game over. reward total was %f' % reward_sum)
                break
        print("Test time:/seconds:", time.time() - start_time)
        print("################test over##########################")
Beispiel #42
0
def on_button_update(_):
    clear_output()
    #display(Javascript('''var c = IPython.notebook.get_selected_index();
    #IPython.notebook.execute_cells([c])'''))
    add_attachment()
Beispiel #43
0
    def train(self, env, q_table
              ):  #This fuction updates the Q table using Bellman's Equation

        #These are the hyperparameters
        alpha = .1
        gamma = .6
        epsilon = .1

        #For plotting metrics
        all_epochs = []
        all_penalties = []
        total_epsiodes = []

        print("Training Started")
        for i in range(1, 100001):
            state = env.reset()

            epochs, penalties, reward, = 0, 0, 0
            done = False

            while not done:
                if np.random.uniform(0, 1) < epsilon:
                    action = env.action_space.sample()  # Explore action space
                else:
                    action = np.argmax(
                        q_table[state])  # Exploit learned values

                next_state, reward, done, info = env.step(action)

                old_value = q_table[state, action]
                next_max = np.max(q_table[next_state])

                new_value = (1 - alpha) * old_value + alpha * (
                    reward + gamma * next_max
                )  #The sate value function which updates the Q value
                q_table[state, action] = new_value

                if reward == -10:
                    penalties += 1

                state = next_state
                epochs += 1

            all_epochs.append(epochs)
            all_penalties.append(penalties)
            if i % 100 == 0:
                clear_output(wait=True)
                print(f"Episode: {i}")

                total_epsiodes.append(i)

        all_epochs = all_epochs[:1000]
        all_penalties = all_penalties[:1000]

        #Plotting the STEPS vs EPISODES graph
        sns.lineplot(total_epsiodes, all_epochs)
        plt.xlabel("Episode")
        plt.ylabel("Steps")
        plt.show()

        # Plotting the PENALTIES vs EPISODES graph
        sns.lineplot(total_epsiodes, all_penalties)
        plt.xlabel("Episode")
        plt.ylabel("penalties")
        plt.show()

        print("Training finished.\n")
Beispiel #44
0
 def emit(self, s):
     clear_output(wait=True)
     print(s.getMessage())
        if movie.find('span', class_='metascore') is not None:
            # print('movie')
            movie_name = movie.find('div', class_='lister-item-content')
            # print(movie_name)
            names.append(movie_name.h3.a.text)
            # print(movie_name.strong.text)
            ratings.append(str(movie_name.strong.text))
            year = movie_name.h3.find('span', class_='lister-item-year text-muted unbold').text
            # print(year)
            years.append(year)
            metascore = movie_name.find('div', class_='ratings-bar')
            metascore = metascore.find('div', class_='inline-block ratings-metascore')
            # print(metascore.span.text)
            metascores.append(str(metascore.span.text))
            vote = movie_name.find('span', attrs={'name':'nv'})['data-value']
            # print(int(vote))
            votes.append(vote)
    clear_output(wait=True)  # send request once the respond has received. wait before sending another request


table_df = pd.DataFrame({'movie': names,
'year': years,
'imdb': ratings,
'metascore': metascores,
'votes': votes
})
table_df.loc[:, 'year'] = table_df['year'].str[-5:-1].astype(int)
table_df.to_csv('imdb_ratings.csv')
print("Done!")

Beispiel #46
0
def get_article_links(start_year, end_year, num_pages):

    """Collect and save a list of links that correspond to
    articles from The Dartmouth daily newspaper.

    Parameters
    ----------
    start_year : int
        Year in the archives to start looking for articles
    end_year : int
        Year in the archives to stop looking for articles
    num_pages : int
        Number of archive pages that fit each year range to
        inspect

    Returns
    -------
    text file
        A text file ('article_links.txt' that contains the
        link to every article that appears in specificed
        year and page range.

    """

    # change range to 0,50 when using cluster
    pages = list(map(str, range(num_pages)))

    # change range to 2012,2019 when using cluster
    year_urls = list(range(start_year, end_year))

    article_links = []

    start_time = time()
    requests = 0

    for year_url in year_urls:

        for page in pages:

            html_page = urllib.request.urlopen(
                "http://www.thedartmouth.com/search?order"
                "=date&direction=desc&begin={}0101&end="
                "{}1231&page={}&per_page=20".format(year_url, year_url, page))

            # html_page = urllib.request.urlopen(
            #    "http://www.thedartmouth.com/search?q=0&page={}&"
            #    "ti=0&tg=0&ty=0&ts_month=0&ts_day=0&ts_year={}&"
            #    "te_month=0&te_day=0&te_year={}&s=0&au=0&o=0&"
            #    "a=1".format(page, year_url, year_url+1))

            # Pause the loop
            sleep(randint(8, 15))

            # Monitor the requests
            requests += 1
            elapsed_time = time() - start_time
            print('Request:{}; Frequency: {} requests/s'.format(
                requests, requests / elapsed_time))
            clear_output(wait=True)

            soup = BeautifulSoup(html_page, 'html.parser')

            links = []
            for link in soup.find_all('a'):
                links.append(link.get('href'))

            for link in links:
                if link not in article_links:
                    if 'article' in link.split('/'):
                        article_links.append(link)

    file = open('article_links.txt', 'w')
    for article_link in article_links:
        file.write("{}\n".format(article_link))
    file.close()
Beispiel #47
0
    def collect_data(self, render: bool = False, print_swarm: bool = False):
        """

        :param render:
        :param print_swarm:
        :return:
        """

        self.tree.reset()
        self.env.reset()
        state, obs = self.data_env.reset(return_state=True)
        i_step, self._agent_reward, end = 0, 0, False
        _end = False
        for i in range(self.skip_initial_frames):
            i_step += 1
            action = 0

            state, obs, _reward, _end, info = self.data_env.step(
                state=state, action=action, n_repeat_action=1
            )
            self.tree.append_leaf(
                i_step,
                parent_id=i_step - 1,
                state=state,
                action=np.ones(self.env.n_actions),
                dt=1,
                reward=np.ones(self.env.n_actions),
                terminal=bool(info["terminal"]),
                obs=obs,
            )

            self._agent_reward += _reward
            self._last_action = action
            end = info.get("terminal", _end)
            if end:
                break
        self._save_steps = []

        while not end and self._agent_reward < self.reward_limit:
            i_step += 1
            self.run_swarm(state=copy.deepcopy(state), obs=obs)
            action_dist, reward_dist = self.estimate_distributions(state=state, obs=obs)
            action = (action_dist + reward_dist).argmax()
            state, obs, _reward, _end, info = self.data_env.step(
                state=state, action=action, n_repeat_action=1
            )
            self.tree.append_leaf(
                i_step,
                parent_id=i_step - 1,
                state=state,
                action=action_dist,
                dt=1,
                reward=reward_dist,
                terminal=bool(_end),
                obs=obs,
            )

            self._agent_reward += _reward
            self._last_action = action
            end = info.get("terminal", _end)
            self._best_id = i_step

            if render:
                self.data_env.render()
            if print_swarm:
                from IPython.core.display import clear_output

                print(self)
                clear_output(True)
            if self._update_parameters:
                self.update_parameters()
Beispiel #48
0
def refresh_and_print(iterator_output):
    clear_output(wait=True)
    n, viz = iterator_output
    print(n, viz, sep='\n')
 def update(self, final=False):
     if IPY:
         clear_output(wait=True)
         display(HTML(self.__html__(final)))
     else:
         print(self.__str__())
Beispiel #50
0
    for page in pages:

        url = "https://www.elespectador.com/search/" + keyword + "?page=" + page
        print(url)
        html = get(url)
        htmlsoup = soup(html.content, 'html.parser')
        time.sleep(randint(2, 4))
        requests += 1
        count += 1
        elapsed_time = time.time() - start_time
        print("")
        print('Palabra: {}; Página: {}; Tiempo: {} min'.format(
            keyword, page, round(elapsed_time / 60, 3)))
        print("")
        clear_output(wait=True)
        articles = htmlsoup.find_all(
            'div',
            class_=
            "node-title field field--name-title field--type-ds field--label-hidden"
        )
        if not articles:
            print("There were no more articles found with your keyword")
            break

        else:
            for oneArticle in articles:
                title = oneArticle.a.text.strip()
                link = oneArticle.a['href']
                content = ''
                url2 = "http://www.elespectador.com" + link
def evolve(central_mass, num_threads, length, length_units, resol, duration,
           duration_units, step_factor, save_number, save_options, save_path,
           npz, npy, hdf5, s_mass_unit, s_position_unit, s_velocity_unit,
           solitons, start_time):
    print('Initialising...')

    ##########################################################################################
    #SET INITIAL CONDITIONS

    if (length_units == ''):
        gridlength = length
    else:
        gridlength = convert(length, length_units, 'l')
    if (duration_units == ''):
        t = duration
    else:
        t = convert(duration, duration_units, 't')
    if (duration_units == ''):
        t0 = start_time
    else:
        t0 = convert(start_time, duration_units, 't')
    if (s_mass_unit == ''):
        cmass = central_mass
    else:
        cmass = convert(central_mass, s_mass_unit, 'm')

    Vcell = (gridlength / float(resol))**3

    ne.set_num_threads(num_threads)

    initsoliton_jit = numba.jit(initsoliton)

    ##########################################################################################
    # CREATE THE TIMESTAMPED SAVE DIRECTORY AND CONFIG.TXT FILE

    save_path = os.path.expanduser(save_path)
    tm = time.localtime()

    talt = ['0', '0', '0']
    for i in range(3, 6):
        if tm[i] in range(0, 10):
            talt[i - 3] = '{}{}'.format('0', tm[i])
        else:
            talt[i - 3] = tm[i]
    timestamp = '{}{}{}{}{}{}{}{}{}{}{}{}{}'.format(tm[0], '.', tm[1], '.',
                                                    tm[2], '_', talt[0], ':',
                                                    talt[1], ':', talt[2], '_',
                                                    resol)
    file = open('{}{}{}'.format('./', save_path, '/timestamp.txt'), "w+")
    file.write(timestamp)
    os.makedirs('{}{}{}{}'.format('./', save_path, '/', timestamp))
    file = open(
        '{}{}{}{}{}'.format('./', save_path, '/', timestamp, '/config.txt'),
        "w+")
    file.write(('{}{}'.format('resol = ', resol)))
    file.write('\n')
    file.write(('{}{}'.format('axion_mass (kg) = ', axion_mass)))
    file.write('\n')
    file.write(('{}{}'.format('length (code units) = ', gridlength)))
    file.write('\n')
    file.write(('{}{}'.format('duration (code units) = ', t)))
    file.write('\n')
    file.write(('{}{}'.format('start_time (code units) = ', t0)))
    file.write('\n')
    file.write(('{}{}'.format('step_factor  = ', step_factor)))
    file.write('\n')
    file.write(('{}{}'.format('central_mass (code units) = ', cmass)))
    file.write('\n\n')
    file.write(
        ('{}'.format('solitons ([mass, [x, y, z], [vx, vy, vz], phase]): \n')))
    for s in range(len(solitons)):
        file.write(('{}{}{}{}{}'.format('soliton', s, ' = ', solitons[s],
                                        '\n')))
    file.write(
        ('{}{}{}{}{}{}'.format('\ns_mass_unit = ', s_mass_unit,
                               ', s_position_unit = ', s_position_unit,
                               ', s_velocity_unit = ', s_velocity_unit)))
    file.write(
        '\n\nNote: If the above units are blank, this means that the soliton parameters were specified in code units'
    )
    file.close()

    loc = save_path + '/' + timestamp

    ##########################################################################################
    # SET UP THE REAL SPACE COORDINATES OF THE GRID

    gridvec = np.linspace(-gridlength / 2.0 + gridlength / float(2 * resol),
                          gridlength / 2.0 - gridlength / float(2 * resol),
                          resol)
    xarray, yarray, zarray = np.meshgrid(
        gridvec,
        gridvec,
        gridvec,
        sparse=True,
        indexing='ij',
    )
    distarray = ne.evaluate(
        "(xarray**2+yarray**2+zarray**2)**0.5")  # Radial coordinates

    ##########################################################################################
    # SET UP K-SPACE COORDINATES FOR COMPLEX DFT (NOT RHO DFT)

    kvec = 2 * np.pi * np.fft.fftfreq(resol, gridlength / float(resol))
    kxarray, kyarray, kzarray = np.meshgrid(
        kvec,
        kvec,
        kvec,
        sparse=True,
        indexing='ij',
    )
    karray2 = ne.evaluate("kxarray**2+kyarray**2+kzarray**2")

    ##########################################################################################
    # INITIALISE SOLITONS WITH SPECIFIED MASS, POSITION, VELOCITY, PHASE

    f = np.load('./Soliton Profile Files/initial_f.npy')

    delta_x = 0.00001  # Needs to match resolution of soliton profile array file. Default = 0.00001

    warn = 0

    psi = pyfftw.zeros_aligned((resol, resol, resol), dtype='complex128')
    funct = pyfftw.zeros_aligned((resol, resol, resol), dtype='complex128')

    for k in range(len(solitons)):
        if (k != 0):
            if (not overlap_check(solitons[k], solitons[:k])):
                warn = 1
            else:
                warn = 0

    for s in solitons:
        mass = convert(s[0], s_mass_unit, 'm')
        position = convert(np.array(s[1]), s_position_unit, 'l')
        velocity = convert(np.array(s[2]), s_velocity_unit, 'v')
        # Note that alpha and beta parameters are computed when the initial_f.npy soliton profile file is generated.
        alpha = (mass / 3.883)**2
        beta = 2.454
        phase = s[3]
        funct = initsoliton_jit(funct, xarray, yarray, zarray, position, alpha,
                                f, delta_x)
        ####### Impart velocity to solitons in Galilean invariant way
        velx = velocity[0]
        vely = velocity[1]
        velz = velocity[2]
        funct = ne.evaluate(
            "exp(1j*(alpha*beta*t0 + velx*xarray + vely*yarray + velz*zarray -0.5*(velx*velx+vely*vely+velz*velz)*t0  + phase))*funct"
        )
        psi = ne.evaluate("psi + funct")

    rho = ne.evaluate("real(abs(psi)**2)")

    fft_psi = pyfftw.builders.fftn(psi, axes=(0, 1, 2), threads=num_threads)
    ifft_funct = pyfftw.builders.ifftn(funct,
                                       axes=(0, 1, 2),
                                       threads=num_threads)

    ##########################################################################################
    # COMPUTE SIZE OF TIMESTEP (CAN BE INCREASED WITH step_factor)

    delta_t = (gridlength / float(resol))**2 / np.pi

    min_num_steps = t / delta_t
    min_num_steps_int = int(min_num_steps + 1)
    min_num_steps_int = int(min_num_steps_int / step_factor)

    if save_number >= min_num_steps_int:
        actual_num_steps = save_number
        its_per_save = 1
    else:
        rem = min_num_steps_int % save_number
        actual_num_steps = min_num_steps_int + save_number - rem
        its_per_save = actual_num_steps / save_number

    h = t / float(actual_num_steps)

    ##########################################################################################
    # SETUP K-SPACE FOR RHO (REAL)

    rkvec = 2 * np.pi * np.fft.fftfreq(resol, gridlength / float(resol))
    krealvec = 2 * np.pi * np.fft.rfftfreq(resol, gridlength / float(resol))
    rkxarray, rkyarray, rkzarray = np.meshgrid(rkvec,
                                               rkvec,
                                               krealvec,
                                               sparse=True,
                                               indexing='ij')

    rkarray2 = ne.evaluate("rkxarray**2+rkyarray**2+rkzarray**2")

    rfft_rho = pyfftw.builders.rfftn(rho, axes=(0, 1, 2), threads=num_threads)
    phik = rfft_rho(rho)  # not actually phik but phik is defined in next line
    phik = ne.evaluate("-4*3.141593*phik/rkarray2")
    phik[0, 0, 0] = 0
    irfft_phi = pyfftw.builders.irfftn(phik,
                                       axes=(0, 1, 2),
                                       threads=num_threads)

    ##########################################################################################
    # COMPUTE INTIAL VALUE OF POTENTIAL

    phisp = pyfftw.zeros_aligned((resol, resol, resol), dtype='float64')
    phisp = irfft_phi(phik)
    phisp = ne.evaluate("phisp-(cmass)/distarray")

    ##########################################################################################
    # PRE-LOOP ENERGY CALCULATION

    if (save_options[3]):
        egylist = []
        egpcmlist = []
        egpsilist = []
        ekandqlist = []
        mtotlist = []

        calculate_energies(
            save_options,
            resol,
            psi,
            cmass,
            distarray,
            Vcell,
            phisp,
            karray2,
            funct,
            fft_psi,
            ifft_funct,
            egpcmlist,
            egpsilist,
            ekandqlist,
            egylist,
            mtotlist,
        )

    ##########################################################################################
    # PRE-LOOP SAVE I.E. INITIAL CONFIG
    save_grid(
        rho,
        psi,
        resol,
        save_options,
        npy,
        npz,
        hdf5,
        loc,
        -1,
        1,
    )

    ##########################################################################################
    # LOOP NOW BEGINS

    halfstepornot = 1  # 1 for a half step 0 for a full step

    tenth = float(
        save_number / 10
    )  #This parameter is used if energy outputs are saved while code is running.
    # See commented section below (line 585)

    clear_output()
    print("The total number of steps is %.0f" % actual_num_steps)
    if warn == 1:
        print(
            "WARNING: Significant overlap between solitons in initial conditions"
        )
    print('\n')
    tinit = time.time()

    for ix in range(actual_num_steps):
        if halfstepornot == 1:
            psi = ne.evaluate("exp(-1j*0.5*h*phisp)*psi")
            halfstepornot = 0
        else:
            psi = ne.evaluate("exp(-1j*h*phisp)*psi")
        funct = fft_psi(psi)
        funct = ne.evaluate("funct*exp(-1j*0.5*h*karray2)")
        psi = ifft_funct(funct)
        rho = ne.evaluate("real(abs(psi)**2)")
        phik = rfft_rho(
            rho)  # not actually phik but phik is defined on next line
        phik = ne.evaluate("-4*3.141593*(phik)/rkarray2")
        phik[0, 0, 0] = 0
        phisp = irfft_phi(phik)
        phisp = ne.evaluate("phisp-(cmass)/distarray")

        #Next if statement ensures that an extra half step is performed at each save point
        if (((ix + 1) % its_per_save) == 0) and halfstepornot == 0:
            psi = ne.evaluate("exp(-1j*0.5*h*phisp)*psi")
            rho = ne.evaluate("real(abs(psi)**2)")
            halfstepornot = 1

            #Next block calculates the energies at each save, not at each timestep.
            if (save_options[3]):
                calculate_energies(
                    save_options,
                    resol,
                    psi,
                    cmass,
                    distarray,
                    Vcell,
                    phisp,
                    karray2,
                    funct,
                    fft_psi,
                    ifft_funct,
                    egpcmlist,
                    egpsilist,
                    ekandqlist,
                    egylist,
                    mtotlist,
                )

            #Uncomment next section if partially complete energy lists desired as simulation runs.
            #In this way, some energy data will be saved even if the simulation is terminated early.

            # if (save_options[3]):
            #     if (ix+1) % tenth == 0:
            #         label = (ix+1)/tenth
            #         file_name = "{}{}".format(label,'egy_cumulative.npy')
            #         np.save(os.path.join(os.path.expanduser(loc), file_name), egylist)
            #         file_name = "{}{}".format(label,'egpcm_cumulative.npy')
            #         np.save(os.path.join(os.path.expanduser(loc), file_name), egpcmlist)
            #         file_name = "{}{}".format(label,'egpsi_cumulative.npy')
            #         np.save(os.path.join(os.path.expanduser(loc), file_name), egpsilist)
            #         file_name = "{}{}".format(label,'ekandq_cumulative.npy')
            #         np.save(os.path.join(os.path.expanduser(loc), file_name), ekandqlist)

        ################################################################################
        # SAVE DESIRED OUTPUTS
        if ((ix + 1) % its_per_save) == 0:

            save_grid(
                rho,
                psi,
                resol,
                save_options,
                npy,
                npz,
                hdf5,
                loc,
                ix,
                its_per_save,
            )

        ################################################################################
        # UPDATE INFORMATION FOR PROGRESS BAR

        tint = time.time() - tinit
        tinit = time.time()
        prog_bar(actual_num_steps, ix + 1, tint)

    ################################################################################
    # LOOP ENDS

    clear_output()
    print('\n')
    print("Complete.")
    if warn == 1:
        print(
            "WARNING: Significant overlap between solitons in initial conditions"
        )

    if (save_options[3]):
        file_name = "egylist.npy"
        np.save(os.path.join(os.path.expanduser(loc), file_name), egylist)
        file_name = "egpcmlist.npy"
        np.save(os.path.join(os.path.expanduser(loc), file_name), egpcmlist)
        file_name = "egpsilist.npy"
        np.save(os.path.join(os.path.expanduser(loc), file_name), egpsilist)
        file_name = "ekandqlist.npy"
        np.save(os.path.join(os.path.expanduser(loc), file_name), ekandqlist)
        file_name = "masslist.npy"
        np.save(os.path.join(os.path.expanduser(loc), file_name), mtotlist)
Beispiel #52
0
    def run(self,
            num_episodes=None,
            plot_stats=None,
            plot_period=1,
            history=None):
        # initialize plots
        plot_stats = plot_stats or []
        if plot_stats:
            num_plots = len(plot_stats)
            fig, axs = plt.subplots(num_plots,
                                    1,
                                    squeeze=False,
                                    figsize=(10, 5 * num_plots))
            axs = axs.ravel()

        # initialize history dict
        history = history or {}
        history = defaultdict(list, history)

        num_episodes = num_episodes or self.num_episodes
        for i in range(num_episodes):
            # Keep track of current episode statistics
            stats = {}
            # sample a trajectory
            train_batch, score = self.sample_trajectory()

            # Get data from the training batch
            obs = train_batch[SampleBatch.OBS]
            actions_old = train_batch[SampleBatch.ACTIONS]
            returns = train_batch[SampleBatch.RETURNS].astype(
                "float32").squeeze()
            # normalize returns on the batch level
            if self.standardize_returns:
                returns = np_standardized(returns)
            # perform gradient descent
            policy_loss, total_loss, entropy_bonus = self.train_op(
                obs, actions_old, returns)
            # record statistics
            stats["policy_loss"] = policy_loss.numpy().item()
            stats["entropy"] = entropy_bonus.numpy().item()
            stats["total_loss"] = total_loss.numpy().item()
            stats["score"] = score
            stats["steps_per_episode"] = len(train_batch)

            for k, v in stats.items():
                history[k].append(v)
                if self.use_tensorboard:
                    tf.summary.scalar(k, v, self.total_episodes)

            self.total_episodes += 1

            if plot_stats:
                if (i + 1) % plot_period == 0:
                    for ax, stat_name in zip(axs, plot_stats):
                        ax.clear()
                        # print(stat_name, len(history[stat_name]))

                        sns.lineplot(
                            x=np.arange(len(history[stat_name])),
                            y=history[stat_name],
                            ax=ax,
                        )

                        ax.set_title(stat_name)
                    display.display(fig)
                    display.clear_output(wait=True)

            else:
                print(f"episode {i}/{self.num_episodes} | {stats}", )

        return history
    def features_processing_from_purchases(self, purchases):
        df_products = pd.read_csv(self.products_path).fillna('None')
        df_products.loc[df_products['netto'] == 'None', 'netto'] = 0
        '''Это функция извлечения признаков из данных о покупках. Данные групируются вокруг клиента'''

        all_client_n = purchases['client_id'].nunique()
        start_time_for_print = strftime("%Y-%m-%d %H:%M:%S", gmtime())
        start_time = time.time()
        if self.low_memory == True:
            dict_product = dict(
                zip(df_products['product_id'].values,
                    df_products.iloc[:, 5].values))
        else:
            dict_product = dict(
                zip(df_products['product_id'].values,
                    df_products.iloc[:, 1:6].values))
        dict_product_number = dict(
            zip(df_products['product_id'].values,
                df_products.iloc[:, -3:].values))
        print(f'Начало обработки клиентов {start_time_for_print}')
        features_product = self.features_dict(df_products, )
        features_product['netto'] = 0
        features_product['trademark'] = 0
        features_product['alhocol'] = 0
        features_product['regular_points_received'] = 0
        features_product['express_points_received'] = 0
        features_product['regular_points_spent'] = 0
        features_product['express_points_spent'] = 0
        for i in range(24):
            if i < 7:
                features_product['dayofweek_' + str(i)] = 0
            features_product['hour_' + str(i)] = 0
        points_list = ['regular_points_received', 'express_points_received',\
                       'regular_points_spent', 'express_points_spent']
        data = []

        for i, (id_c,
                client) in enumerate(purchases.groupby('client_id',
                                                       sort=False)):

            if (i + 1) % 5000 == 0:
                clear_output()
                print(f'Начало обработки клиентов {start_time_for_print}')
                print(f'Обработано {i+1} клиентов из {all_client_n}...')
                print(f'C начало прошло {int(time.time()-start_time)} секунд')

            features = features_product.copy()
            n_trans = client['transaction_id'].nunique()
            features['transactions'] = n_trans
            features['sum'] = client['trn_sum_from_iss'].sum()
            features['trn_sum_from_red'] = client['trn_sum_from_red'].sum()
            features['n_store'] = client['store_id'].nunique()
            features['n_product'] = client['product_id'].nunique()
            features['max_price'] = client['trn_sum_from_iss'].max()
            features['min_price'] = client['trn_sum_from_iss'].min()
            features['quantity'] = client['product_quantity'].sum()
            features['first_buy_sum'] = client['purchase_sum'].iloc[0]
            features['last_buy_sum'] = client['purchase_sum'].iloc[-1]
            try:
                features['almost_last_buy'] = client['purchase_sum'].unique(
                )[-2]
            except:
                features['almost_last_buy'] = client['purchase_sum'].unique(
                )[0]

            features['client_id'] = client['client_id'].iloc[0]
            features['transaction_max_delay'] = self.transaction_max_delay(
                client)

            #Features from products
            count_products = Counter(client['product_id'])
            if self.low_memory == True:
                for product in count_products.keys():
                    features[
                        'segment_id_' +
                        str(dict_product[product])] += count_products[product]
            else:
                for product in count_products.keys():
                    values = dict_product[product]
                    for value in values:
                        if type(value) != str:
                            features['segment_id_' +
                                     str(value)] += count_products[product]
                        else:
                            features[value] = count_products[product]

            temp_dict_quantity = dict(
                zip(client['product_id'], client['product_quantity']))
            for product, quantity in temp_dict_quantity.items():

                features['netto'] += quantity * dict_product_number[product][0]
                features[
                    'trademark'] += quantity * dict_product_number[product][1]
                features[
                    'alhocol'] += quantity * dict_product_number[product][2]

            #Features from date
            temp_dict_date = dict(
                zip(client['transaction_id'].values,
                    client['dayofweek'].values))
            for dayofweek in temp_dict_date.values():
                features['dayofweek_' + str(dayofweek)] += 1

            temp_dict_date = dict(
                zip(client['transaction_id'].values, client['hour'].values))
            for hour in temp_dict_date.values():
                features['hour_' + str(hour)] += 1

            #Features from points
            points_dict = dict(
                zip(client['transaction_id'].values,
                    client[points_list].values))
            for point in points_dict.values():
                features['regular_points_received'] += point[0]
                features['express_points_received'] += point[1]
                features['regular_points_spent'] += point[2]
                features['express_points_spent'] += point[3]

            #Average features
            features['avg_regular_points_received'] = features[
                'regular_points_received'] / n_trans
            features['avg_express_points_received'] = features[
                'express_points_received'] / n_trans
            features['avg_regular_points_spent'] = features[
                'regular_points_spent'] / n_trans
            features['avg_express_points_spent'] = features[
                'express_points_spent'] / n_trans
            features[
                'avg_sum_from_red'] = features['trn_sum_from_red'] / n_trans
            features['avg_price_product'] = features['sum'] / n_trans
            features['avg_delay_beetwen_transc'] = features[
                'transaction_max_delay'] / n_trans
            features['avg_sum'] = features['sum'] / n_trans
            features['avg_quantity'] = features['quantity'] / n_trans
            features['avg_netto'] = features['netto'] / n_trans
            features['avg_trademark'] = features['trademark'] / n_trans
            features['avg_alhocol'] = features['alhocol'] / n_trans

            data.append(features)

        clear_output()
        print(f'Начало обработки клиентов {start_time_for_print}')
        print(
            f'Обработка данных клиентов завершена за {int(time.time()-start_time)} секунд'
        )

        return data
Beispiel #54
0
 def animate(self):
     display.clear_output()
     display.display(self)
     sys.stdout.flush()
Beispiel #55
0
    def start(self):
        def startImport(b):
            b.disabled = True
            print('start import...')
            parent_dir, files = self.previous_step.data
            self.progressbar.max = int(self.sample_num.value)
            if not parent_dir.lower().endswith('.zip'):
                sampled_ids = random.sample(range(0, len(files)),
                                            self.progressbar.max)
                for i in range(0, self.progressbar.max):
                    file = files[sampled_ids[i]]
                    base_name = path.basename(file)
                    self.progressbar.value = i
                    with open(parent_dir + '/' + file) as f:
                        text = f.read()
                        if file.lower().endswith('.xml'):
                            self.loadTextFromXml(file, text, self.data)
                        else:
                            self.dataset_name = 'unknown'
                            self.data = self.data.append(
                                pd.DataFrame(
                                    [[None, base_name, text, None, None]],
                                    columns=[
                                        'BUNCH_ID', 'DOC_NAME', 'TEXT', 'DATE',
                                        'REF_DATE'
                                    ]))
                self.progressbar.value = self.progressbar.max
            else:
                sampled_ids = random.sample(range(0, len(self.file_list)),
                                            self.progressbar.max)
                for i in range(0, self.progressbar.max):
                    finfo = self.file_list[sampled_ids[i]]
                    ifile = self.zfile.open(finfo)
                    doc_text = ifile.read().decode("utf-8")
                    base_name = path.basename(finfo.filename)
                    self.progressbar.value = i
                    if finfo.filename.lower().endswith('.xml'):
                        self.loadTextFromXml(finfo, doc_text, self.data)
                    else:
                        self.dataset_name = 'unknown'
                        self.data = self.data.append(
                            pd.DataFrame(
                                [[None, base_name, doc_text, None, None]],
                                columns=[
                                    'BUNCH_ID', 'DOC_NAME', 'TEXT', 'DATE',
                                    'REF_DATE'
                                ]))
                self.zfile.close()
                self.progressbar.value = self.progressbar.max
            self.next_button.disabled = False
            # self.data.set_index('DOC_NAME', inplace=True)
            if self.dataset_name == 'n2c2':
                self.inferRefDate(self.data)
            print("Totally " + str(len(sampled_ids)) +
                  " files have been imported into dataframe.\n"
                  "They are parsed into " + str(len(self.data)) +
                  " records in dataframe.")
            pass

        if self.previous_step.data is None:
            self.previous_step.start()
        parent_dir, files = self.previous_step.data
        if not parent_dir.lower().endswith('.zip'):
            label = widgets.HTML(
                "<h4>Read %s files from: </h4><p>%s</p>".format(
                    len(files), parent_dir))
            self.start_import_btn.on_click(startImport)
            self.sample_num.value = str(len(files))
            self.progressbar.max = len(files)
            rows = [
                label, self.sample_num, self.start_import_btn, self.progressbar
            ] + self.addSeparator(top='10px') + [
                self.addPreviousNext(self.show_previous, self.show_next)
            ]
        else:
            import zipfile, os
            parent_dir = os.path.join(self.previous_step.path, parent_dir)
            self.zfile = zipfile.ZipFile(parent_dir)
            print('reading file list from {} ...'.format(parent_dir))
            self.file_list = [
                f for f in self.zfile.infolist() if not f.is_dir()
            ]
            label = widgets.HTML(
                "<h4>Read {} files from: </h4><p>{}</p>".format(
                    len(self.file_list), parent_dir))
            self.sample_num.value = str(len(self.file_list))
            self.progressbar.max = len(self.file_list)
            self.start_import_btn.on_click(startImport)
            rows = [
                label, self.sample_num, self.start_import_btn, self.progressbar
            ] + self.addSeparator(top='10px') + [
                self.addPreviousNext(self.show_previous, self.show_next)
            ]
        vbox = widgets.VBox(rows)
        vbox.layout.flex_grown = 'column'
        clear_output()
        display(vbox)
        return self.data
Beispiel #56
0
 def update(self, _=None):
     """Map the widgets values to cufflinks parameters and creates a plot"""
     clear_output(wait=True)  #Is there any way to avoid the clipping?
     self.update_layout()
     if self.bargap.value == 1.0:
         bgap = None
     else:
         bgap = self.bargap.value
     if self.bargroupgap.value == 1.0:
         bgroupgap = None
     else:
         bgroupgap = self.bargroupgap.value
     if self.bins.value == 0:
         bins = None
     else:
         bins = self.bins.value
 #try:
     if self.apply_layout.value:
         self.layout_d.visible = True
         layout = self.layout_d.output.copy()
         layout[
             'barmode'] = self.barmode.value  #barmode must be in layout dict to take effect if you are using layout param
         self._df[list(self.columns.value)].iplot(
             barmode=self.barmode.value,
             colors=self.cmap.map_data(range(len(self.columns.value)),
                                       hex=True),
             theme=self.theme.value,
             x=self.x.value,
             y=self.y.value,
             z=self.z.value,
             mode=self.marker_mode.value,
             title=self.title.value,
             xTitle=self.xtitle.value,
             yTitle=self.ytitle.value,
             zTitle=self.ztitle.value,
             colorscale=self.color_scale.value,
             text=self.text.value,
             size=self.size.value,
             layout_update=layout
         )  #weird bug where kind and colors sometimes doesnt apply propperly
     else:
         self.layout_d.visible = False
         self._df[list(self.columns.value)].iplot(
             kind=self.kind.value,
             barmode=self.barmode.value,
             colors=self.cmap.map_data(range(len(self.columns.value)),
                                       hex=True),
             theme=self.theme.value,
             x=self.x.value,
             y=self.y.value,
             z=self.z.value,
             mode=self.marker_mode.value,
             title=self.title.value,
             xTitle=self.xtitle.value,
             yTitle=self.ytitle.value,
             zTitle=self.ztitle.value,
             colorscale=self.color_scale.value,
             text=self.text.value,
             size=self.size.value,
             fill=self.fill.value,
             subplots=self.subplots.value,
             shared_xaxes=self.shared_xaxes.value,
             shared_yaxes=self.shared_yaxes.value,
             symbol=self.symbol.value,
             bins=bins,
             bargap=bgap,
             bargroupgap=bgroupgap,
             orientation=self.ori.value,
             dash=self.dash.value,
             histnorm=self.histnorm.value,
             histfunc=self.histfunc.value,
             categories=self.categories.value,
             secondary_y=list(self.secondary_y.value))
Beispiel #57
0
def cisco_data(url):
    requests = 0
    start_time = time()
    for page in pages:
        #make a get request
        #cisco = get('https://www.trustradius.com/products/cisco-webex-meetings/reviews?f='+page, headers = headers)
        cisco = get(url + '?f=' + page, headers=headers)

        #pause the loop for 8-20 seconds
        sleep(randint(8, 20))

        #monitor the requests
        requests += 1
        elapsed_time = time() - start_time
        print('Request:{}; Frequency: {} requests/s'.format(
            requests, requests / elapsed_time))
        clear_output(wait=True)

        #show a warning if a non 200 status code is returned
        if cisco.status_code != 200:
            warn('Request: {}; Status code: {}'.format(requests,
                                                       response.status_code))

        cisco_soup = BeautifulSoup(cisco.text, 'html.parser')

        #find the major tag peculiar to each review
        container = cisco_soup.find_all("div", class_='serp-header')
        container2 = cisco_soup.find_all("div", class_='serp-body')
        #iterate through the major tag
        for con in container:

            #scrape the review body
            if not con.find('h3'):
                review_title = '-'
            else:
                review_title = con.find('h3').text
            review_titles.append(review_title)

            #scrape the review dates
            if not con.select('div.review-date'):
                review_date = '-'
            else:
                review_date = con.select('div.review-date')[0].text
            review_dates.append(review_date)

            #scrape the user scores
            if not con.select('div.trust-score__score span'):
                user_score = '-'
            else:
                user_score = con.select('div.trust-score__score span')[1].text
            user_scores.append(user_score)

            #scrape the user names.
            if not con.select('div.name'):
                name = '-'
            else:
                name = con.select('div.name')[0].text
            names.append(name)

            #scrape the user positions.
            if not con.select('div.position'):
                position = '-'
            else:
                position = con.select('div.position')[0].text
            positions.append(position)

            #scrape the company
            if not con.select('span.company'):
                company = '-'
            else:
                company = con.select('span.company')[0].text
            companys.append(company)

            #scrape the industry tupe.
            if not con.select('span.industry-type'):
                IndustryType = '-'
            else:
                IndustryType = con.select('span.industry-type')[0].text
            IndustryTypes.append(IndustryType)

            #scrape the employees count
            if not con.select('span.size'):
                Employee = '-'
            else:
                Employee = con.select('span.size')[0].text
            Employees.append(Employee)

            #scrape the review body
        for con in container2:
            if not con.select('div.question-response-container')[0]:
                review_body = '-'
            else:
                review_body = con.select(
                    'div.question-response-container')[0].text
            review_bodys.append(review_body)

            #scrape the review pros
            if not con.select('ul.pros'):
                review_pro = '-'
            else:
                review_pro = con.select('ul.pros')[0].text
            review_pros.append(review_pro)

            #scrape the review cons
            if not con.select('ul.cons'):
                review_con = '-'
            else:
                review_con = con.select('ul.cons')[0].text
            review_cons.append(review_con)
    def anime_scraper(self, tot_page_num, pause_sec, is_clear_output=True):
    	'''
    	Method to scrape a list of animes

    	Arguments:
    	tot_page_num: total number of pages to scrape
    	pause_sec: number of seconds to pause before going to the next page
    	is_clear_output: whether to clear output
    	'''

    	# address of the page to start scraping
        start_site_string = 'https://myanimelist.net/topanime.php'
        
        # initialize some variable
        start_time = time()
        num_request = 0
        page_num = 1
        anime_num = 0

        # initialize the dataframe with column names
        anime_dataset_columns = ['anime', 'score', 'popularity',
                                 'season_start', 'year_start', 'season_end', 'year_end',
                                 'num_episodes', 'anime_type']
        self.df_ = pd.DataFrame(columns=anime_dataset_columns)

        # request the initial page
        source = requests.get(start_site_string).text
        soup = BeautifulSoup(source, 'lxml')

        # update number of requests
        num_request += 1

        # pause for the number of second specified so as not to overburden the website
        sleep(pause_sec)

        # loop until some specified condition
        while True:

        	# loop through each animes on the page
            for anime in soup.find_all('tr', class_='ranking-list'):

                # get information about the anime
                info_list = anime.find('div', class_='information di-ib mt4').text.strip().split()

                # get the information only if the information is stored in a nice format
                if len(info_list) == 10:

                    # get the title of the anime
                    anime_code = anime.find('a', class_='hoverinfo_trigger fl-l fs14 fw-b')
                    anime_name = anime_code.text if anime_code is not None else np.nan

                    # get the anime score
                    score_code = anime.find('span', class_='text on')
                    score = score_code.text if score_code is not None else np.nan

                    # get the type of the anime
                    anime_type = info_list[0]

                    # get the number of episodes and replace the missing values with -500
                    num_episodes = int(info_list[1].split('(')[-1].replace('?', '-500'))

                    # get the months when the animes started and ended
                    season_start, season_end = info_list[3], info_list[6]

                    # get the years when the animes started and ended
                    year_start, year_end = int(info_list[4]), int(info_list[7])

                    # get the popularity of the animes
                    popularity = int(info_list[-2].replace(',', ''))

                    # store the above information into a list and append it to the dataframe
                    all_info_list = [anime_name, score, popularity,
                                     season_start, year_start, season_end, year_end,
                                     num_episodes, anime_type]
                    info_dict = {column_name: info for column_name, info in zip(anime_dataset_columns, all_info_list)}
                    self.df_ = self.df_.append(info_dict, ignore_index=True)

                else:
                    continue

            # get the time elapsed and print the number of requests per second
            time_elapsed = time() - start_time
            print('processed {0:.3f} requests/s'.format(num_request/time_elapsed))

            # print the number of pages processed
            print('{}/{} pages processed'.format(page_num, tot_page_num))

            # break the while loop if the specified number of pages has reached
            if page_num == tot_page_num:
                print('terminated because total page specified reached')
                break

            # request the next page and break if it failed, meaning the scraper has reached the maximum possible
            # number of pages
            try: 
                next_code = soup.find('a', class_='link-blue-box next')
                source = requests.get(start_site_string + next_code['href']).text
                soup = BeautifulSoup(source, 'lxml')

            except TypeError: 
                print('terminated because no more pages can be accessed from the website')
                break

            # update the number of requests and pause for number of seconds specified
            num_request += 1
            sleep(pause_sec)

            # clear output
            if is_clear_output:
                clear_output(wait=True)

            # update the page number
            page_num += 1
Beispiel #59
0
def on_button_confirm(_):
    global attach
    clear_output()
    print('Allegato: ' + str(attach))
    open_attachment(attach)
    variable_parameters={name: []
                         for name in variables['names']},
    model_reporters=model_reporters)

count = 0
for i in range(replicates):
    for vals in param_values:
        # Change parameters that should be integers
        vals = list(vals)
        vals[0] = int(vals[0])

        # Transform to dict with parameter names and their values
        variable_parameters = {}
        for name, val in zip(variables['names'], vals):
            variable_parameters[name] = val

        variable_parameters['floorplan'] = floor_plans[0]

        batch.run_iteration(variable_parameters, tuple(vals), count)
        count += 1

        clear_output()
        print(f'{count / (len(param_values) * replicates) * 100:.2f}% done')

data = batch.get_model_vars_dataframe()
data.to_csv('data_results.csv')
print(data)

plt.scatter(data['human_count'], data['Total_steps'])
plt.savefig('human_count_total_steps.png')
plt.show()