def sample_reduction(positive_samples, negative_samples, ratio: float):
    """
    Reduce the number of training samples in the dataset
    to the match the given ratio
    Parameter:
        positive_samples (DataFrame):    The positive training samples
        negative_samples (DataFrame):   The negative training samples
        ratio  (float):                 Ratio of the positive and
        negative samples for the training, e.g. 0.1 -> 10/% positive samples
    """
    # apply the lower boundary to the fraction, ensure the fraction is still
    # in range 0 - 1
    total_count = len(positive_samples.index) + len(negative_samples.index)
    fraction_positive = total_count * ratio / len(positive_samples.index)
    fraction_negative = total_count * (1 - ratio) / len(negative_samples.index)
    # positive limits
    if fraction_positive > 1.0:
        fraction_negative = len(positive_samples.index) * \
            ((1 - ratio) / ratio) / len(negative_samples.index)
        fraction_positive = 1.0
    # negative limits
    elif fraction_negative > 1.0:
        fraction_positive = len(negative_samples.index) * \
            (ratio / (1 - ratio)) / len(positive_samples.index)
        fraction_negative = 1.0

    positive_samples = positive_samples.sample(frac=fraction_positive)
    negative_samples = negative_samples.sample(frac=fraction_negative)
    log(
        f"Reduced the number of samples to\
             {len(positive_samples.index)}/\
                  {len(negative_samples.index)} ({ratio}/ {1-ratio})", False)
    return positive_samples, negative_samples
Beispiel #2
0
    def _set_up_architecture_middle_inception(self):
        """Set up all variable architecture elements between the static input and output by using a the inception net.

        :return: The fully-connected bottleneck layer (not yet merged with any old bottleneck values).
        """
        log.log("hidden architecture: inception")
        return inception_builder.build(self._X)
Beispiel #3
0
    def __init__(self,
                 images: np.ndarray,
                 labels: np.ndarray,
                 bottlenecks=None):
        """Create a new DataBundleAdvanced.

        :param images: See parent class.
        :param labels: See parent class.
        :param bottlenecks: See parent class.
        """
        # inform the user about potential memory improvements
        # (these refer to the casting done right after this check)
        if images.dtype != cf.get("img_dtype"):
            log.log(
                "WARNING: copying image array, because it has the wrong dtype: {}"
                .format(images.dtype))
        if labels.dtype != cf.get("label_dtype"):
            log.log(
                "WARNING: copying label array, because it has the wrong dtype: {}"
                .format(labels.dtype))

        # if the given parameters do not have the correct data type yet, we will convert them now
        images = np.asarray(images, dtype=cf.get("img_dtype"))
        labels = np.asarray(labels, dtype=cf.get("label_dtype"))
        # TODO why aren't we doing this for the bottlenecks? may they ever have varied?

        # now redirect the (maybe modified) parameters to the actual constructor of the parent class
        super(DataBundleAdvanced, self).__init__(images, labels, bottlenecks)

        # calculate and store the total number of foreground samples
        # (assuming that there are only two classes and the foreground class is described by "1")
        self._n_positive_samples = self.labels.sum()
Beispiel #4
0
 def log_results(results, first_line="results:"):
     """Log all values given in results."""
     log.log(first_line)
     for key, value in results.items():
         # format and log the value
         value_format = criteria.get(key).format(value)
         log.log("    - {}: {}".format(key, value_format))
Beispiel #5
0
def scrape(all=False, **kwargs):
    if all:
        sources = ['http://www.europarl.europa.eu/meps/en/directory/xml?letter=&leg=']
    else:
        sources = ['http://www.europarl.europa.eu/meps/en/incoming-outgoing/incoming/xml',
                   'http://www.europarl.europa.eu/meps/en/incoming-outgoing/outgoing/xml',
                   'http://www.europarl.europa.eu/meps/en/full-list/xml']
    payload={}
    if 'onfinished' in kwargs:
        payload['onfinished']=kwargs['onfinished']
    if all:
        actives = {e['UserID'] for e in db.meps_by_activity(True)}
        inactives = {e['UserID'] for e in db.meps_by_activity(False)}
        meps = actives | inactives
        for unlisted in [ 1018, 26833, 1040, 1002, 2046, 23286, 28384, 1866, 28386,
                          1275, 2187, 34004, 28309, 1490, 28169, 28289, 28841, 1566,
                          2174, 4281, 28147, 28302, ]:
            meps.discard(unlisted)
            payload['id']=unlisted
            add_job('mep', dict(payload))
    for src in sources:
        root = fetch(src, prune_xml=True)
        for id in root.xpath("//mep/id/text()"):
            if all: meps.discard(int(id))
            payload['id']=int(id)
            add_job('mep', dict(payload))
    if all:
        log(3,"mepids not in unlisted nor in directory {!r}".format(meps))
        for id in meps:
            payload['id']=id
            add_job('mep', dict(payload))
Beispiel #6
0
    def handle_read(self):
        data = self.recv(8192)
        #print(data)
        if not data:
            return
        try:
            data = loads(data)
        except:
            self.notify('Invalid json\n')
            return
        if 'command' not in data:
            self.notify('Missing "command" attribute', type='error')
            return
        if data['command'] in ['l', 'ls', 'list']:
            self.notify('scraper queue list', **get_all_jobs())

        if data['command'] in ['c', 'call']:
            if data.get('scraper') not in self.scrapers:
                self.notify('Missing or invalid scraper ' +
                            data.get('scraper'))
            payload = data.get('payload', {})
            add_job(data['scraper'], payload)

        if data['command'] in ['log', 'setlog', 'setlogfile']:
            set_logfile(data.get('path'))
            log(3, 'Changing logfile to {0}'.format(data.get('path')))

        log(3, '# Command `{0}` processed'.format(data['command']))
Beispiel #7
0
def get_engine(url):
    ATTEMPTS = 5
    DELAY = 3

    def fail(message):
        log(message, color=log_colors.FAIL)
        sys.exit(1)

    if url is None:
        fail('No database url. Check your environment.')

    try:
        engine = create_engine(url)
    except Exception:
        fail('Invalid database url. Check your environment.')
    else:
        attempt = 0
        while True:
            try:
                with engine.connect():
                    pass
            except Exception:
                if attempt < ATTEMPTS:
                    log(f'Could not connect to DB, retrying in {DELAY}')
                    time.sleep(DELAY)
                    attempt += 1
                    continue

                fail('Cannot connect to database.')
            else:
                return engine
Beispiel #8
0
    def __init__(self):
        log('Gst init..')
        Gst.init()
        self.player = Gst.ElementFactory.make('playbin', 'player')
        fakesink = Gst.ElementFactory.make('fakesink', 'fakesink')
        self.player.set_property('video_sink', fakesink)

        bus = self.player.get_bus()
        bus.add_signal_watch()
        bus.connect('message::error', self.on_error)
        bus.connect('message::eos', self.on_eos)
        bus.connect('message::state-changed', self.on_state_change)

        self.stateListeners = []
        self.queueListeners = []

        self.PLAYING = False
        self.LOADED = False     #can resume?
        self.state = None
        self.songqueue = []
        self.shuffleBackupQueue = []
        self.cursor = -1

        self.MODE_REPEAT = MusicService.MODE_REPEAT_OFF
        self.MODE_SHUFFLE = MusicService.MODE_SHUFFLE_OFF
Beispiel #9
0
def load_scrapers():
    scrapers = {}
    for scraper in os.listdir('scrapers/'):
        if scraper.startswith('_') or not scraper.endswith('.py'):
            continue
        try:
            name = scraper[:-3]
            import_path = 'scrapers.' + name
            if import_path in sys.modules:
                del sys.modules[import_path]
            s = load_source(import_path, 'scrapers/' + scraper)
        except:
            log(1, "failed to load scraper" % scraper)
            traceback.print_exc()
            continue
        s._queue = Queue()
        scrapers[name] = s
        s._name = name
        if hasattr(s, 'CONFIG'):
            cfg = CONFIG.copy()
            cfg.update(s.CONFIG)
            s.CONFIG = cfg
        else:
            s.CONFIG = CONFIG.copy()
        s.add_job = add_job
        s.get_all_jobs = get_all_jobs
        s._lock = RLock()
        s._job_count = 0
        if s.CONFIG['abort_on_error']:
            s._error_queue = [False for _ in range(ERROR_WINDOW)]
        Thread(target=run_scraper, args=(s, ), name=s._name).start()
        log(3, 'scraper %s added' % scraper)
    return scrapers
Beispiel #10
0
def load(path):
    log.log("load ", path)
    with open(path, 'r', encoding='utf-8') as f:
        s = f.read()
        log.log('load: s', s)
        if s:
            return json.loads(s)
Beispiel #11
0
 def loadandplay(self, song):
     self.stop()
     log('load and play...')
     self.player.set_property('uri', 'file://' + song[LOCATION])
     self.player.set_state(Gst.State.PLAYING)
     self.LOADED = True
     self.PLAYING = True
Beispiel #12
0
 def check_host(self):
     if self._target.lower().startswith("http://"):
         self.https = False
         self._target = domain = self._target.split("//")[-1].split("/")[0]
         log("Detected HTTP url, using HTTP ATK", "success")
     elif self._target.lower().startswith("https://"):
         self.https = True
         self._target = domain = self._target.split("//")[-1].split("/")[0]
         log("Detected HTTPS url, using HTTPS ATK", "success")
     else:
         if socket.socket(socket.AF_INET, socket.SOCK_STREAM).connect_ex(
             (self._target, 80)) == 0:
             log(
                 "Target {} as port 80 open, using HTTP ATK".format(
                     self._target), "success")
             self.https = False
             return True
         else:
             if socket.socket(socket.AF_INET,
                              socket.SOCK_STREAM).connect_ex(
                                  (self._target, 443)) == 0:
                 log(
                     "Target {} as port 443 open, using HTTPS ATK".format(
                         self._target), "success")
                 self.https = True
                 return True
             else:
                 log("Target www ports are closed (80 & 443), abort.",
                     "error")
                 return False
Beispiel #13
0
    def loop(self, ):
        epoch = int(time.time())

        for exc in self._pending:
            if epoch >= exc.epoch_start():

                def execute():
                    exc.execute()

                self._running += [(
                    exc,
                    self._executor.submit(execute),
                )]

                self._pending.remove(exc)

                log("Execution {} started".format(exc.id()), "success")

        for r in self._running:
            if epoch >= r[0].epoch_stop():
                r[0].kill()
                time.sleep(1)

                if r[1].done():
                    log("Execution {} finished".format(r[0].id()), "success")
                    self._running.remove(r)
    def create_input_placeholder(self):
        """Extend the default image input by optional augmentation operations.

        self._X will be set in the parent class.
        :return:
        """
        if cf.get("data_augmentation_online"):
            log.log("Extending the input placeholders with augmentation operations")
            # this can replace the default input node (self._X) with additional augmentation operations done in TensorFlow
            self._X_augmentation_input = tf.placeholder(name=cf.get("graph_input_training_layer_name") + "_augmented",
                                                        shape=self._shape_data_batch,
                                                        dtype=tf.float32)
            self._Y_augmentation_input = tf.placeholder(name='Y_augmented', shape=self._shape_labels_batch,
                                                        dtype=tf.int32)
            self._X_augmented, self._Y_augmented = data_augmentation_online.add_augmentation_operations(
                self._X_augmentation_input, self._Y_augmentation_input)

            # network input
            # (dtype=tf.uint8 is not allowed)
            _X = tf.placeholder_with_default(name=cf.get("graph_input_training_layer_name"),
                                                  shape=self._shape_data_batch,
                                                  input=self._X_augmented)

            _Y = tf.placeholder_with_default(name="Y",
                                             shape=self._shape_labels_batch,
                                             input=self._Y_augmented)
            return _X, _Y
        else:
            return super().create_input_placeholder()
    def _update_best_val_results(self, res_val, step):
        """Check whether res_val contains better results than the best seen so far and remember the answer.

        :param res_val: Validation results as returned by self._full_evaluation(self._ds.valid)
        :param step: The iteration number in which res_val were obtained.
        :return:
        """
        if self.best_val_results is None \
                or res_val[self._main_criteria] > self.best_val_results[self._main_criteria]:
            # snapshots
            log.log("Saving snapshot..")
            snapshot_path_prefix = os.path.join(self._snapshot_dir_session,
                                                "val_{}_{:.3f}".format(
                                                    self._main_criteria,
                                                    res_val[self._main_criteria]
                                                ))
            self.best_snapshot_path = self._saver.save(self._session, snapshot_path_prefix, global_step=step)
            self.best_val_results = res_val
            self.iterations_since_best_found = 0

            # log the new high score
            best_val_txt = criteria.get(self._main_criteria).format(self.best_val_results[self._main_criteria])
            log.log("Updated best model with validation {} of {}".format(
                self._main_criteria,
                best_val_txt)
            )
Beispiel #16
0
def parseAndSave(content,currentWebsite):
    # save_html_content(currentWebsite.id, websiteContents)

    soup = BeautifulSoup(content, 'lxml')

    items = soup.find_all('a')

    print("A Items", len(items))

    COUNT = 0

    if items:
        for a in items:
            if a.string:
                url, text = a.get('href'), a.string.encode('utf-8').strip()

                check_pass = check_content(url, text)

                if check_pass:
                    url = complement_url(url, currentWebsite.url)
                    if url:
                        result = save_info_feed(url, text, currentWebsite.id, currentWebsite.company.id)
                        if result:
                            COUNT += 1

    if COUNT == 0:
        log(NOTICE, "#{id} {name} {site} 没抓到更新 {count} 条".format(id=currentWebsite.company.id,
                                                                 name=currentWebsite.company.name_cn.encode(
                                                                     'utf-8').strip(), site=currentWebsite.url,
                                                                 count=COUNT))
    else:
        log(RECORD, "#{id} {name} {site} 抓到更新 {count} 条".format(id=currentWebsite.company.id,
                                                                name=currentWebsite.company.name_cn.encode(
                                                                    'utf-8').strip(), site=currentWebsite.url,
                                                                count=COUNT))
Beispiel #17
0
def journal(activity, detail):
    what = {'t': time.time(),
            'a': str(activity),
            'd': detail,
    }
    journaldb.insert(what)
    log("Journal updated: ", activity)
Beispiel #18
0
    def __on_run_done(self, work_result):
        """
        Handles user feedback on success or failure of select function.
        """
        self.run_button.setEnabled(True)
        self.parent.remove_worker_by_jid(work_result.job_id)
        result = work_result.result
        if not result:
            err_log('Error running select..')
            return
        try:
            if 'all_tables' in result:
                all_tables = result['all_tables']
                self.parent.populate_table_lists(all_tables)
            else:
                text = 'Error running select: \'' + str(result) + '\''
                err_log(text)

            # if self.table:
            #     self.table.close_table()
            #     self.table.close()
            #     QWidget().setLayout(self.table_container.layout())
            #
            # self.table = HapiTableView(self, new_table_name)
            # layout = QtWidgets.QGridLayout(self.table_container)
            # layout.addWidget(self.table)
            # self.table_container.setLayout(layout)

            log('Select successfully ran.')
        except Exception as e:
            err_log('Error running select.')
            debug(e)
Beispiel #19
0
    def get_instance_from_address(self, filepath_abi, contract_address):
        """Returns a contract instance object from address
        Does a (crude) check that the deployment at that address is
        not empty. Creates a contract instance for use with all
        the 'Contract' methods specified in web3.py
        Returns:
            self.contract_instance(class ContractInterface): see above
        """

        self.contract_address = contract_address

        with open(filepath_abi, 'r') as fd:
            contract_abi = json.load(fd)

        try:
            contract_bytecode_length = len(
                self.web3.eth.getCode(self.contract_address).hex())
        except web3.exceptions.InvalidAddress as e:
            log("Contract address if invalid: {}".format(e),
                "error",
                errcode=-127)

        try:
            assert (contract_bytecode_length >
                    4), f"Contract not deployed at {self.contract_address}."
        except AssertionError as e:
            print(e)
            raise
        #else:
        #print(f"Contract deployed at {self.contract_address}. This function returns an instance object.")

        self.contract_instance = self.web3.eth.contract(
            abi=contract_abi[0], address=self.contract_address)

        return self.contract_instance
    def stop(self):
        """Stop the previously-started runtime evaluation."""
        self._end_time = time()
        self._elapsed_seconds = self._end_time - self._start_time

        log.log("TimeWatcher Stop {}: {}".format(
            self._name, self.seconds_to_str(self._elapsed_seconds)))
    def visualize_train_valid(train_split, val_split, train_predictions,
                              val_predictions):
        """Static helper method to save an image of a CollageEvaluation containing training and validation data.

        :param train_split:
        :param val_split:
        :param train_predictions:
        :param val_predictions:
        :return:
        """
        # create the collage
        collage = CollageEvaluation(
            splits={
                SPLIT_KEY_TRAIN: train_split,
                SPLIT_KEY_VAL: val_split,
                # SPLIT_KEY_TEST: self._ds.test,  # usually we don't need this here
            },
            predictions={
                SPLIT_KEY_TRAIN: train_predictions,
                SPLIT_KEY_VAL: val_predictions,
                # SPLIT_KEY_TEST: self.predict(self._ds.test),  # usually we don't need this here
            })

        # save the image file
        collage_key = "split_evaluation"
        collage_file_path = collage.save_img_file(collage_key)
        log.log("Saved image collage to visualize evaluation to {}".format(
            collage_file_path))
    def start(self):
        """Starts counting the time."""
        self._start_time = time()
        self._end_time = None
        self._elapsed_seconds = None

        log.log("TimeWatcher Start: {}".format(self._name))
    def _ignore_file(self, dataset_key: str, ignore_key: str):
        """Add the given file to the ignore list.
        
        Note, this will not yet remove any already loaded files from e.g. self._image_infos.
        """
        # if the ignore list hasn't been initialized yet, load existing info from files first
        if self._ignore_dicts is None:
            self._read_ignore_dicts()

        if ignore_key not in self._ignore_dicts[dataset_key]:
            log.log("Ignoring {} of dataset {}.".format(
                ignore_key,
                dataset_key
            ))

            # add file to the internal ignore list
            if dataset_key not in self._ignore_dicts:
                self._ignore_dicts[dataset_key] = dict()
            self._ignore_dicts[dataset_key][ignore_key] = True

            # persisting: add one line per image to the ignore list file
            ignore_file_path = self._get_ignore_file_path(dataset_key)
            with open(ignore_file_path, 'a') as file:
                file.write(ignore_key + "\n")

                # TODO check whether this file is used in the currently loaded image_infos
        else:
            log.log("Already ignored: {} of dataset {}.".format(
                ignore_key,
                dataset_key
            ))
    def _unignore_file(self, dataset_key: str, ignore_key: str):
        """Remove the given file from the ignore list.
        
        Note, this will not yet add any already added files from e.g. self._image_infos.
        """
        # if the ignore list hasn't been initialized yet, load existing info from files first
        if self._ignore_dicts is None:
            self._read_ignore_dicts()

        log.log("Unignoring {} of dataset {}.".format(
            ignore_key,
            dataset_key
        ))

        # remove file from the internal ignore list
        if dataset_key in self._ignore_dicts and ignore_key in self._ignore_dicts[dataset_key]:
            del self._ignore_dicts[dataset_key][ignore_key]

        # persisting: remove referencing file lines
        # each line is one ignored element
        ignore_file_path = self._get_ignore_file_path(dataset_key)
        file_str = ""
        lines = file_handler.read_txt_lines(ignore_file_path)
        for line in lines:
            if line != ignore_key:
                file_str += line + "\n"
        with open(ignore_file_path, "w") as file:
            file.write(file_str)
Beispiel #25
0
def votemeta(line, date):
    log(3, 'vote title is "%s"' % line)
    res={'rapporteur': []}
    m=docre.search(line)
    if m:
        doc=m.group(1).replace(' ', '')
        log(4,'setting doc to "%s"' % doc)
        res['doc']=doc
        reports=db.get("dossiers_by_doc", doc)
        if reports:
            res['epref']=[report['procedure']['reference'] for report in reports]
            if len(reports) > 1:
                log(3,"more than 1 dossier referencing document %s, %s" % (doc,[d['procedure']['reference'] for d in reports]))
        else:
            if doc in VOTE_DOX_RE:
                res['epref']=[VOTE_DOX_RE[doc]]
            elif doc not in ignoredox:
                log(2,'%s despite matching regex could not associate dossier with vote in "%s"' % (doc,line))
        return res

    m=refre.search(line)
    if m and db.get('ep_dossiers',m.group(1)):
        res['epref']=[m.group(1)]
        return res
    for k,v in VOTE_DOX.items():
        if k in line:
            res['epref']=[v]
            return res
    log(4,'no associated dossier for: "%s"' % line)
    return res
 def handle_noargs(self, **options):
     try:
         log('Debug', 'Running FUNF decryption script')
         decrypt()
     except Exception as e:
         log('Error',
             'Exception thrown from FUNF decryption script: ' + str(e))
Beispiel #27
0
def _build_production_model(model_def, best_params, x, y):
    log("Production model build started at %s\n" % now())

    super_model = model_def.model(best_params)
    super_model.fit(x, y)

    return super_model
Beispiel #28
0
    def _run_all_models(self, refactoring, refactoring_name, dataset, features, scaler, x, y, x_train, x_tests, y_train, y_tests, test_names):
        """
        For each model, it:
        1) Performs the hyper parameter search
        2) Performs k-fold cross-validation
        3) Persists evaluation results and the best model
        """
        for model in self._models_to_run:
            model_name = model.name()
            if TEST:
                model_name += " test"
            try:
                log("\nBuilding Model {}".format(model.name()))
                self._start_time()
                test_scores, model_to_save = self._run_single_model(model, x, y, x_train, x_tests, y_train, y_tests)

                # log test scores
                log(format_results_single_run(dataset, refactoring_name, test_names, model_name, test_scores["precision"],
                                            test_scores["recall"], test_scores['accuracy'], test_scores['tn'],
                                            test_scores['fp'], test_scores['fn'], test_scores['tp'],
                                              model_to_save, features))

                # we save the best estimator we had during the search
                model.persist(dataset, refactoring_name, features, model_to_save, scaler)
                self._finish_time(dataset, model, refactoring)
            except Exception as e:
                log("An error occurred while working on refactoring " + refactoring_name + " model " + model.name()
                       + " with datasets: " + str(test_names))
                log(str(e))
                log(str(traceback.format_exc()))
Beispiel #29
0
 def mark_question_as_resolved(self, question_id):
     try:
         self.cur.execute(
             "UPDATE question SET has_answer=TRUE WHERE question_id=%s",
             (question_id, ))
     except:
         log("question reolution db update failed")
Beispiel #30
0
    def _extract_windows(self, img: ImageInfo, convert_raw_to_np=True):
        """Extract all sliding windows from the given img.

        Essentially, this is a wrapper for Window.extract_windows(img) to allow additional steps required by subclasses.
        Exceptions will be caught and replaced by an empty list along with an error message, because we don't want the
        complete inference process to get stopped because of single images.
        """
        try:
            windows_raw, windows_info = Window.extract_windows(img, convert_raw_to_np)

            if len(windows_raw) < 1:
                raise ValueError("Could not extract any windows from the given image")

            return windows_raw, windows_info

        except FileNotFoundError:
            log.log(" .. Skipped {}, because the file could not be found".format(
                img.path_resized
            ))
            return [], []
        except:
            log.log(" .. Skipped {}, because of an unexpected error:\n{}".format(
                img.path_resized,
                traceback.format_exc()
            ))
            return [], []
def albumart(songtitle):
    try:
        return ImageQt.ImageQt(
            cropsquare(Image.open(artname(songtitle) + '.jpg')))
    except:
        log(traceback.format_exc())
        return ImageQt.ImageQt(cropsquare(Image.open('img/example3.jpg')))
def load_file(filename):
	#pdb.set_trace()
	log.log('Debug', 'Trying to populate db with ' + filename);
	mConnector = ConnectorFunf.objects.all()[0]
	db = database.Database()
	anonymizerObject = Anonymizer()
	
	documents_to_insert = defaultdict(list)
	
	proc_dir = os.path.join(mConnector.decrypted_path, 'processing')
	if not os.path.exists(proc_dir):
		os.makedirs(proc_dir)
		
	decrypted_filepath = os.path.join(mConnector.decrypted_path, filename)
	processing_filepath = os.path.join(proc_dir,filename)
	current_filepath = decrypted_filepath
	if os.path.exists(decrypted_filepath) and not os.path.exists(processing_filepath):
		try:
			# move to processing
			shutil.move(decrypted_filepath, proc_dir)
			current_filepath = processing_filepath
			# open connection to db file
			conn = sqlite3.connect(processing_filepath)
			cursor = conn.cursor()
			
			# get the meta data from db file
			meta = {}
			(meta['device'], meta['uuid'], meta['device_id'], meta['sensible_token'], meta['device_bt_mac']) = \
				cursor.execute('select device, uuid, device_id, sensible_token, device_bt_mac from file_info').fetchone()
			meta['device_id'] = anonymizerObject.anonymizeValue('device_id',meta['device_id'])
			
			#pdb.set_trace()
			# get the user associated with the token
			#meta['user'] = authorization_manager.getAuthorizationForToken(\
			#	'connector_funf.submit_data', meta['token']).user
			meta['user'] = '******'
			for row in cursor.execute('select * from data'):
				doc = row_to_doc(row, meta['user'], anonymizerObject )
				if doc == None:
					continue
				documents_to_insert[doc['probe']].append(dict(doc.items() + meta.items()))
			
			cursor.close();
			#pdb.set_trace()
			for probe in documents_to_insert:
				db.insert(documents_to_insert[probe], probe)
				
			os.remove(current_filepath);
			
		except Exception as e:
			log.log('Error', str(e));
			if not 'already exists' in str(e):
				top = traceback.extract_stack()[-1]
				fail.fail(current_filepath, load_failed_path, 'Exception with file: ' + filename\
				+ '\n' + ', '.join([type(e).__name__, os.path.basename(top[0]), str(top[1])]))
			else:
				pass
			
			return False
Beispiel #33
0
def write_xml_lmf(*args, **kwds):
    # An XML LMF file contains one lexical resource, itself containing lexicon(s)
    wrapper_rw(lmf_write, *args, **kwds)
    # Count total number of entries to report to user
    entries_nb = 0
    for lexicon in args[0].get_lexicons():
        entries_nb += lexicon.count_lexical_entries()
    log("Successfully wrote %s LMF entries into XML LMF file '%s'." % (entries_nb, args[1]))
Beispiel #34
0
def write_tex(*args, **kwds):
    # A LaTeX file contains one or several lexicons and informations about the lexical resource
    wrapper_rw(tex_write, *args, **kwds)
    # Count total number of entries to report to user
    entries_nb = 0
    for lexicon in args[0].get_lexicons():
        entries_nb += lexicon.count_lexical_entries()
    log("Successfully wrote %s LMF entries into LaTeX file '%s'." % (entries_nb, args[1]))
	def config(self, request):
		log.log('Debug', 'GET for config')
		access_token = request.REQUEST.get('access_token', '')
		authorization = self.pipe.getAuthorization(access_token)
		config = self.readConfig(authorization['user'])
		if config:
			return HttpResponse(config)
	        else:
        		return HttpResponse(status='500')
Beispiel #36
0
def write_odt(*args, **kwds):
    # Import only when needed because it requires installation of Python package 'odf'
    from output.odt import odt_write
    # A document file contains one or several lexicons and informations about the lexical resource
    wrapper_rw(odt_write, *args, **kwds)
    # Count total number of entries to report to user
    entries_nb = 0
    for lexicon in args[0].get_lexicons():
        entries_nb += lexicon.count_lexical_entries()
    log("Successfully wrote %s LMF entries into document file '%s'." % (entries_nb, args[1]))
Beispiel #37
0
def delete_book(book_id):
    log("Deleting book with _id:", book_id)
    book = {"_id": ObjectId(book_id)}

    original = booksdb.find_one(book)

    result = booksdb.remove(book)

    journal('DELETE', {'in': None, 'out': original})
    return jsonify({'result': result})
 def handle(self, *args, **options):
 	
 	if len(args) < 1:
 		log('Error','You have to give me the filename')
 		return;
 	#log('Debug','Will try to decrypt ' + args[0]);
 	try:
 		decrypt_file_from_upload(args[0]);
 	except Exception as e:
 		log.log('Error','Exception while single decrypting file ' + args[0] + ': ' + str(e))
def config(request):
	pdb.set_trace();
	log.log('Debug', 'GET for config')
	access_token = request.REQUEST.get('access_token', '')
	#authorization = self.pipe.getAuthorization(access_token)
	#config = self.readConfig(authorization['user'])
	config = readConfig('dummy')
	if config:
		return HttpResponse(config)
	else:
		return HttpResponse(status='500')
Beispiel #40
0
def main():
    old_version=None
    cfg_manager=config.get_cfg_manager()
    LOG.log("try to read config file :./release.conf")
    cfg=cfg_manager.parse_config_file('./release.conf')
    LOG.log("config:%s" % cfg)
    source=source_manager.SourceManager(cfg['source'])
    build=build_manager.BuildManager(cfg['build'])
    collect=collect_manager.CollectManager(cfg['collect'])
    monitor=server.Server(cfg['monitor'])
    monitor.status()
    monitor.start(source,build,collect)
Beispiel #41
0
 def add_all(cls, articles):
     if not articles:
         return
     session = DBSession()
     session.add_all(articles)
     try:
         session.flush()
         session.commit()
     except SQLAlchemyError:
         session.rollback()
         log.log(message=articles[0].url)
         # raise(SQLAlchemyError)
     finally:
         session.close()
Beispiel #42
0
def read_xml_lmf(*args, **kwds):
    # To access options
    from pylmflib import options
    global options
    # An XML LMF file contains one lexical resource, itself containing lexicon(s)
    lexical_resource = wrapper_rw(lmf_read, *args, **kwds)
    # Count total number of entries to report to user
    entries_nb = 0
    for lexicon in lexical_resource.get_lexicons():
        entries_nb += lexicon.count_lexical_entries()
        if options.cross_references:
            # Verify lexicon coherence
            lexicon.check_cross_references()
    log("Successfully created %s LMF entries from XML LMF file '%s'." % (entries_nb, args[0]))
    return lexical_resource
def upload(request):
	log.log('Debug', 'Received POST')
	scope = 'all_probes'

	access_token = request.REQUEST.get('access_token', '')


	if request.META['CONTENT_TYPE'].split(';')[0]=='multipart/form-data':
		try:
			uploaded_file = request.FILES['uploadedfile']
			if uploaded_file:
				#try:
					
					#authorization = authorization_manager.getAuthorizationForToken(scope, access_token)
					mConnector = ConnectorFunf.objects.all()[0];
					#if ('error' in authorization) or (authorization == None):
					#	upload_path = mConnector.upload_not_authorized_path;
					#else:
					#	upload_path = mConnector.upload_path
					upload_path = mConnector.upload_path	
					backup_path = mConnector.backup_path

					if not os.path.exists(upload_path):
						os.makedirs(upload_path)
					if not os.path.exists(backup_path):
						os.makedirs(backup_path)
					
					filename = uploaded_file.name.split('.')[0].split('_')[0]+'_'+access_token+'_'+str(int(time.time()))+'.db'
					filepath = os.path.join(upload_path, filename)
					while os.path.exists(filepath):
						parts = filename.split('.db');
						counted_parts = re.split('__',parts[0]);
						counter = -1;
						if len(counted_parts) > 1:
							counter = int(counted_parts[1]);
						filename = counted_parts[0] + '__' + str(counter + 1) + '.db'
						filepath = os.path.join(upload_path, filename)

					write_file(filepath, uploaded_file)
					shutil.copy(filepath, os.path.join(backup_path, filename))
					
					# run decryption in the background
					log.log('Debug', settings.ROOT_DIR + './manage.py ' + filename)
					p = Popen([settings.ROOT_DIR + './manage.py','funf_single_decrypt',filename], stdout=PIPE, stderr=PIPE)

				#except Exception as e:
				#	log.log('Error', 'Could not write: ' + str(e))
				#	return HttpResponse(status='500')
				#else:
					return HttpResponse(json.dumps({'ok':'success'}))
			else:
				log.log('Error', 'failed to read')
		except KeyError as e:
			log.log('Error', 'Key error: ' + str(e))
			pass
	# bad request
	return HttpResponse(status='500')
Beispiel #44
0
 def request(self, url, headers=headers, cookies={}):
     delay = 60
     while(True):
         try:
             r = requests.get(url, headers=headers, timeout=10)
         except:
             print "网络异常,休眠", delay, "秒"
             log.log(message=u"网络异常" + url)
             time.sleep(delay)
             delay += 60
             continue
         if self.is_valid(r):
             return self.convert_encode(r)
         elif r.status_code == 503:
             log.log(message=u"503 error " + url)
             time.sleep(60)
             continue
         return u""
Beispiel #45
0
 def search(self):
     "Search torrents and update all hits corresponding to the current episode"
     self.update_airdates()
     airdate = self.get_airdate(self.hits.current)
     if not airdate:
         log.verbose("%s (%s) doesn't have an air date" % (self.name, self.hits.current))
         # Try to determine if there is still an episode which airs after
         episode, airdate = self.get_episodes_after(self.hits.current)
         if episode:
             self.hits.current = episode
     if airdate and airdate >= date.today():
         log.verbose("%s (%s) will air on %s" % (self.name, self.hits.current, airdate))
     else:
         torrents = self.search_by_episode(self.hits.current)
         log.log(0 if len(torrents) else 1, "%d torrent(s) found for '%s' (%s)" % (len(torrents), self.name, self.hits.current))
         if torrents:
             torrents = [t.merge(self.hits.torrent(self.hits.current, t)) for t in torrents]
             self.hits.torrents(self.hits.current, torrents)
     self.hits.save()
Beispiel #46
0
def read_mdf(*args, **kwds):
    import wrapper
    # To access options
    from pylmflib import options
    global options
    # Find lexicon configuration if any
    try:
        id = kwds['id']
    except KeyError:
        id = None
    if id is not None and wrapper.lexical_resource is not None:
        lexicon = wrapper.lexical_resource.get_lexicon(id)
        # Add lexicon argument
        kwds.update({'lexicon': lexicon})
    # An MDF file contains one lexicon only, but wrapper_rw() function encapsulates it into a lexical resource
    lexical_resource = wrapper_rw(mdf_read, *args, **kwds)
    for lexicon in lexical_resource.lexicon:
        if options.cross_references:
            # Verify lexicon coherence
            lexicon.check_cross_references()
        log("Successfully created %s LMF entries from MDF file '%s'." % (lexicon.count_lexical_entries(), lexicon.get_entrySource()))
    return lexical_resource
def decrypt_file(directory_to_decrypt, f):
	#pdb.set_trace()
	proc_dir = os.path.join(directory_to_decrypt, 'processing')
	if not os.path.exists(proc_dir):
		os.makedirs(proc_dir)
	upload_filename = os.path.join(directory_to_decrypt, f)
	proc_filename = os.path.join(proc_dir, f)
	decrypted_filename = os.path.join(mConnector.decrypted_path, f)
	curr_filename = upload_filename #for keeping track of the file's current location
	decryption_success = False;
	try:
		# check if still exists, might have been moved in another thread
		if os.path.exists(upload_filename) and not os.path.exists(proc_filename):
			# move it to processing
			shutil.move(upload_filename, proc_dir)
			curr_filename = proc_filename
			# decrypt
			if decrypt_if_not_db_file(proc_filename, key, extension=None):
				decryption_success = True;
				fail.safe_move(proc_filename, mConnector.decrypted_path)
				log.log('Debug','Still here #1')
				curr_filename = decrypted_filename
				orig_filename = proc_filename + '.orig'
				if os.path.exists(orig_filename):
					os.remove(orig_filename)
				#log.log('Debug','Still here #2')	
				database_single_population.load_file(f)
			return True
		else:
			return False
	except Exception as e:
		#find out when it happened
		action = '';
		if curr_filename == upload_filename:
			action = 'moving to /processing'
		elif curr_filename == proc_filename and decryption_success == False:
			action = 'decrypting'
		elif curr_filename == proc_filename and decryption_success == True:
			action = 'moving to /decrypted'
		elif curr_filename == decrypted_filename:
			action = 'removing the .orig file of'
		try:
			if not str(e).contains('already exists'):
				fail.fail(curr_filename, mConnector.decryption_failed_path, 'Exception thrown: ' + str(e) + '. While ' + action + ' file: ' + f)
				log.log('error', 'README ^^^^^^^^^^^^^')
			else:
				log.log('error','Exception thrown: ' + str(e) + '. While ' + action + ' file: ' + f);
		
		except Exception as e1:
			pass
		
		return False;
	'''
	def upload(self, request):
		log.log('Debug', 'Received POST')
		scope = 'all_probes'

		access_token = request.REQUEST.get('access_token', '')


		if request.META['CONTENT_TYPE'].split(';')[0]=='multipart/form-data':
	#	if not request.META['CONTENT_TYPE']=='multipart/form-data;boundary=*****':
			try:
				uploaded_file = request.FILES['uploadedfile']
				if uploaded_file:
					try:
						
						#authorization = self.pipe.getAuthorization(access_token, scope=scope)
						authorization = ''
				
						if 'error' in authorization:
							upload_path = service_config.CONNECTORS["connector_funf"]["config"]["upload_not_authorized_path"]
						else:
							upload_path = service_config.CONNECTORS["connector_funf"]["config"]["upload_path"]

						if not os.path.exists(upload_path):
							os.mkdir(upload_path)
						
						filepath = os.path.join(upload_path, uploaded_file.name.split('.')[0].split('_')[0]+'_'+access_token+'_'+str(int(time.time()))+'.db')
						
						self.write_file(filepath, uploaded_file)

					except Exception as e:
						log.log('Error', 'Could not write: ' + str(e))
						return HttpResponse(status='500')
					else:
						return HttpResponse(json.dumps({'ok':'success'}))
				else:
					log.log('Error', 'failed to read')
			except KeyError as e:
				log.log('Error', 'Key error: ' + str(e))
				pass
		# bad request
		return HttpResponse(status='500')
Beispiel #49
0
 def test_log(self):
     import os
     ## Test with options
     self.options.log_filename = "test/log.txt"
     msg = "These are options."
     log(msg, self.options)
     # Test log file
     expected_line = "These are options." + EOL
     log_file = open_read(self.options.log_filename)
     self.assertEqual(log_file.readline(), expected_line)
     log_file.close()
     ## Test without options
     msg = "This is a message."
     log(msg)
     # Test log file
     expected_lines = ["These are options." + EOL,
                       "This is a message." + EOL]
     log_file = open_read(self.options.log_filename)
     self.assertListEqual(log_file.readlines(), expected_lines)
     ## Test verbose mode (need to reset log filename)
     self.options.verbose = True
     log_filename = self.options.log_filename
     self.options.log_filename = None
     log(msg, self.options)
     # Test that log file remains unchanged
     log_file = open_read(log_filename)
     self.assertListEqual(log_file.readlines(), expected_lines)
     ## Test unwrittable file
     self.options.log_filename = "/usr/log.txt"
     test = False
     try:
         log(msg, self.options)
     except Error:
         test = True
     self.assertTrue(test)
     ## Remove log file
     os.remove(log_filename)
Beispiel #50
0
def read_sort_order(*args, **kwds):
    sort_order = wrapper_rw(order_read, *args, **kwds)
    log("Successfully read sort order: " + str(sort_order))
    return sort_order
Beispiel #51
0
def update_book(book_id):
    book = {"_id": ObjectId(book_id)}
    log("Updating book with _id:", book_id)

    original = booksdb.find_one(book)

    log("Incoming json:", request.json)
    details = request.json

    del (details['_id'])
    log("Updated record: ", details)

    try:
        valid_book = book_schema(details)
        pprint(valid_book)
        log('Book validated.')
    except Exception as ve:
        log(ve)
        return jsonify({'error': unicode(ve), 'status': 'Error'}), 415

    log("Updating with: ", valid_book)

    result = booksdb.update(book, valid_book)

    journal('UPDATE', {'in': valid_book, 'out': original})

    return jsonify({'result': result})
Beispiel #52
0
def get_book_details(book_id):
    result = booksdb.find_one({"_id": ObjectId(book_id)})
    result["_id"] = str(result["_id"])
    pprint(result)
    log("Returning book search for", book_id)
    return jsonify({'details': result})
def cleanFailedFilenames(failed_filenames):
	for filename in failed_filenames:
		log.log('Debug', 'File: ' + str(filename) + ' already exists.')
Beispiel #54
0
def write_mdf(*args, **kwds):
    # As an MDF file can only contain one lexicon, create as many MDF files as lexicons in the lexical resource (TODO: rename files)
    for lexicon in args[0].get_lexicons():
        wrapper_rw(mdf_write, lexicon, *args[1:], **kwds)
        log("Successfully wrote %s LMF entries into MDF file '%s'." % (lexicon.count_lexical_entries(), args[1]))
Beispiel #55
0
from bson.objectid import ObjectId
from pprint import pprint
import json
import time
import pymongo

from flask import Flask, jsonify, request
from voluptuous import Schema, Optional, Required, Match

from utils.log import log
import isbntools


app = Flask(__name__, static_url_path='')

log("Beginning.")

db_host = 'localhost'
db_port = 27017

dbclient = pymongo.MongoClient(db_host, db_port)
db = dbclient['c-lib']
booksdb = db['books']
journaldb = db['journal']

book_schema = Schema({
    Optional('_id'): Match('^(?=[a-f\d]{24}$)(\d+[a-f]|[a-f]+\d)'),
    Required('authors'): unicode,
    Required('comment'): unicode,
    Required('coordinates'): [int, int],
    Required('created'): float,
Beispiel #56
0
def fail(filename, failed_directory_path, message):
    log.log("Error", message)
    safe_move(filename, failed_directory_path)
Beispiel #57
0
def add_book_by_isbn():
    isbnservice = "wcat"

    request.get_data()

    try:
        jsonstuff = json.loads(request.data)
    except Exception as e:
        log("JSON Decoding fail: ", e, request.data)

    if not jsonstuff or not 'isbn' in jsonstuff or \
            isbntools.notisbn(jsonstuff['isbn']):
        log("Invalid ISBN: ", jsonstuff)
        return jsonify({'status': "Invalid ISBN"}), 400
    else:
        isbn = str(jsonstuff['isbn'])

    log("ISBN entered:", isbn)

    book = booksdb.find_one({"isbn": isbn})
    if book:
        log("Book known")
        pprint(book)
        book['_id'] = str(book['_id'])

        return jsonify({'book': book, 'status': 'Book existant'}), 201

    try:
        meta = isbntools.meta(isbn, service=isbnservice)
        log("META found:", meta)
    except Exception as e:
        log("META not found: ", e)
        meta = {'Publisher': "Unknown",
                'Language': "Unknown",
                'Title': "Unkown",
                'Authors': ["Unkown"],
                'Year': "Unkown"}
        # TODO: Allow/offer manual entry
        return jsonify({'status': "Metaserver lookup failed."}), 504

    try:
        book = {
            'isbn': isbn,
            'publisher': unicode(meta['Publisher']),
            'language': unicode(meta['Language']),
            'title': unicode(meta['Title']),
            'authors': str(meta['Authors']),
            'year': int(meta['Year']),
            'created': time.time(),
            'modified': time.time(),
            'coordinates': [0, 0],
            'status': u'None',
            'tags': [],
            'comment': u''
        }
    except TypeError:
        return jsonify({'error'}), 415

    log(book)

    try:
        valid_book = book_schema(book)
        pprint(valid_book)
        log('Book validated.')
    except Exception as ve:
        log(ve)
        return jsonify({'error': unicode(ve), 'status': 'Error'}), 415

    booksdb.insert(valid_book)
    valid_book['_id'] = str(valid_book['_id'])

    journal('ADD', {'in': valid_book, 'out': None})

    return jsonify({'book': book, 'status': 'Book created'}), 201
Beispiel #58
0
def read_config(*args, **kwds):
    lexical_resource = wrapper_rw(config_read, *args, **kwds)
    log("Successfully read config")
    return lexical_resource
	def handle_noargs(self, **options):
		try:
			log('Debug', 'Running database population script')
			populate()
		except Exception as e:
			log('Error', 'Exception thrown from database population script: ' + str(e))
def run(db):
	print 'running'
	authorizationManager = AuthorizationManager()
	decrypted_path = settings.CONNECTORS['connector_funf']['config']['decrypted_path']
	load_failed_path = settings.CONNECTORS['connector_funf']['config']['load_failed_path']
	#TODO
	raw_filenames = [filename for filename in os.listdir(decrypted_path) if fnmatch.fnmatch(filename, '*.orig')]

	anonymizerObject = Anonymizer()

	raw_filenames = raw_filenames[:settings.CONNECTORS['connector_funf']['config']['max_population_files']]
	filenames = [os.path.join(decrypted_path, filename) for filename in raw_filenames]

	print raw_filenames
	proc_dir = os.path.join(decrypted_path, 'processing')
	failed_filenames = []

	for f in filenames:	
		try:
			shutil.move(f, proc_dir)
		except Exception as e:
			failed_filenames.append(os.path.basename(f))

	raw_filenames = [e for e in raw_filenames if e not in failed_filenames]
	filenames = [os.path.join(proc_dir, filename) for filename in raw_filenames]

	cleanFailedFilenames(failed_filenames)

	cursor = None
	documents_to_insert = defaultdict(list)
	filenames_to_remove = []
	nof_files = len(filenames)
	file_count = 0
	
	for filename in filenames:
		file_count += 1
		if not os.path.exists(filename):
			continue
		log.log('Debug', 'Populating to DB, file(%d/%d): %s' % (file_count, nof_files, filename))
		try:
			conn = sqlite3.connect(filename)
			cursor = conn.cursor()
		except Exception as e:
			fail.fail(filename, load_failed_path, 'Exception thrown:' + str(e) + '. While trying to open sqlite file: ' + filename)
			continue


		try:
			device = cursor.execute('select device from file_info').fetchone()[0]
			uuid = cursor.execute('select uuid from file_info').fetchone()[0]
			device_id = ''
			try:
				device_id = anonymizerObject.anonymizeValue('device_id',str(cursor.execute('select device_id from file_info').fetchone()[0]))
			#	device_id = str(cursor.execute('select device_id from file_info').fetchone()[0])
			except Exception as e:
				fail.fail(filename, load_failed_path, 'Exception thrown: ' + str(e) + '. While trying to extract device_id from file: ' + filename)
				continue

			#TODO: replace device_id with token
			try:
			#	user = anonymizerObject.anonymizeValue('user', authorizationManager.getAuthorizationForToken('connector_funf', 'all_probes', device_id)['user'])
				user = authorizationManager.getAuthorizationForToken('connector_funf', 'all_probes', device_id)['user']
			except KeyError: user = None
			if not user:
				log.log('Debug', 'User does not exist for device id: ' + str(device_id))
				fail.fail(filename, load_failed_path, 'No user found in database. Device id: ' + str(device_id))
				continue
	
			for row in cursor.execute('select * from data'):
				name = row[1]
				timestamp = row[2]
				#TODO: separate this sanitization
				data_raw = row[3].replace('android.bluetooth.device.extra.DEVICE','android_bluetooth_device_extra_DEVICE')
				data_raw = data_raw.replace('android.bluetooth.device.extra.NAME', 'android_bluetooth_device_extra_NAME')
				data_raw = data_raw.replace('android.bluetooth.device.extra.CLASS', 'android_bluetooth_device_extra_CLASS')
				data_raw = data_raw.replace('android.bluetooth.device.extra.RSSI', 'android_bluetooth_device_extra_RSSI')
				data = json.loads(data_raw)
				doc = {}
				doc['_id'] = hashlib.sha1(json.dumps(data)).hexdigest()+'_'+user+'_'+str(int(timestamp))
				doc['uuid'] = uuid
				doc['device'] = device
				doc['device_id'] = device_id
				doc['user'] = user
				doc['probe'] = data['PROBE'].replace('.','_')
				doc['data'] = anonymizerObject.anonymizeDocument(data, doc['probe'])
				doc['name'] = name
				doc['timestamp'] = float(timestamp)
				doc['timestamp_added'] = time.time()
				documents_to_insert[doc['probe']].append(doc)
	
		except Exception as e:
			fail.fail(filename, load_failed_path, 'Exception thrown: ' + str(e) + '. While extracting data from file: ' + filename)
#			traceback.print_exc(file=sys.stdout)
			continue
	
		cursor.close()
		log.log('Debug', 'Adding file to be populated')
		filenames_to_remove.append(filename)
	

	#TODO: make sure that the duplicates logic works
	for probe in documents_to_insert:
		try:
			db.insert(documents_to_insert[probe], probe)
		except Exception as e:
		#	print 'problem!!!' + probe + ' '
		#	traceback.print_exc(file=sys.stdout)
			pass


	for filename in filenames_to_remove:
		print "removing ",filename
		os.remove(filename)