Beispiel #1
0
def start_btsync(request):
    """
        Run BitTorrent Sync application
    """
    global pid

    if is_btsync_active():
        return HttpResponseRedirect('/')

    # If wrong structure of config file, return an error
    if 'btsync_conf_file' not in config:
        return HttpResponse('Klucz btsync_conf_file nie istnieje w pliku konfiguracyjnym')

    # If btsync-folder doesn't exist, create it
    btsync_conf = load_json(config['btsync_conf_file'])
    if not os.path.exists(btsync_conf['storage_path']):
        os.makedirs(btsync_conf['storage_path'])

    # If BTSync config file doesn't exist, create a new one
    if not path.isfile(config['btsync_conf_file']):
        create_empty_btsync_config_file()

    # Start BTSync process
    if platform.system() == 'Windows':
        pass                                # for the future
    elif platform.system() == 'Linux':
        pid = subprocess.Popen([config['btsync_exe_file'], '--config', config['btsync_conf_file']])
        while not is_btsync_active():     pass      # need this line to wait for BTSync to start

        if 'uid' not in config:
            config['uid'] = get_uid(config['btsync_server_address'])
            save_json(os.path.join(config['application_path'], 'config.json'), config)

    return HttpResponseRedirect('/')
Beispiel #2
0
def edit_comment(request):
    """
        Edit existing comment. Returns nothing.
        Keys in request.body:
            comment - dict with new comment's data. Keys:
                comment - new content which will replace the old one
                timestamp - timestamp of the moment first version of comment was created
                uid - ID of comment's author
            fullthreadpath - full path of thread directory where the comment files are stored
    """
    try:
        if not check_connection():
            return HttpResponseServerError('Brak połączenia z Internetem.')

        edited_comment = json.loads(request.body)['comment']
        comment_path = os.path.join(json.loads(request.body)['threadfullpath'],
                                   edited_comment['timestamp'] + file_name_separator + edited_comment['uid'])

        # get old comment and perform changes
        comment = load_json(comment_path)
        comment['history'].append({'timestamp': get_timestamp(),
                                   'comment': edited_comment['comment']})
        comment['comment'] = edited_comment['comment']

        save_json(comment_path, comment)

        return JsonResponse({}, safe=False)
    except Exception:
        return HttpResponseServerError('Wystąpił nieznany błąd podczas edycji komentarza.')
Beispiel #3
0
def do_diff_langfile(args):
    """Calculate a pack file given two lang files."""
    from_json = common.load_json(args.fileorig)
    to_json = common.load_json(args.filetrans)
    if "filename" in args and args.filename is not None:
        file_path = args.filename.split('/')
    else:
        file_path = ["lang", "sc", os.path.basename(args.fileorig)]
    result = {}
    # This is arbitrary. "foobar" or "en_US" would also work.
    from_locale = args.from_locale
    iterator = common.walk_langfile_json({from_locale: from_json}, [], [])
    for langlabel, dict_path, _ in iterator:
        text = common.get_data_by_dict_path(to_json, dict_path)
        if text is None:
            continue
        trans = {"orig": langlabel[from_locale], "text": text}

        result[common.serialize_dict_path(file_path, dict_path)] = trans
    # we are already sorted by game order
    if args.sort_order == "alpha":
        result = common.sort_dict(result)
    if args.resultfile == '-':
        common.save_json_to_fd(sys.stdout, result)
    else:
        common.save_json(args.resultfile, result)
Beispiel #4
0
def delete_folder(request):
    """
        Removing shared folder but not from file system - only from BitTorrent Sync and config.json.
    """
    try:
        data = json.loads(request.body)

        js = json.loads(
            requests.get(
                "http://" + config["btsync_server_address"] + "/api",
                params={"method": "remove_folder", "secret": data["secret"]},
                auth=("team", "sync"),
            ).text
        )

        if js["error"] == 0:
            # popping identity from list of our identities in config file
            if data["secret"] in config["identities"]:
                config["identities"].pop(data["secret"])

            # saving config file
            save_json(config["application_path"] + "/config.json", config)

            return HttpResponse("Usunięto folder.")

        return HttpResponseServerError("Wystąpił błąd podczas usuwania folderu.")
    except Exception:
        return HttpResponseServerError("Wystąpił nieznany błąd podczas usuwania folderu.")
Beispiel #5
0
def main():
    with closing(open(sys.argv[1], 'r')) as raw_html:
        soup = BeautifulSoup(raw_html, 'lxml')

    uwagi_hide_list = soup.find_all('tr', class_='uwagi_hide')
    for uwagi_hide in uwagi_hide_list:
        uwagi_hide.extract()

    group_tag = soup.find_all('b', string=re.compile("Grupy zajęciowe"))[0]
    group_tag_parent = group_tag.parent
    info_table = group_tag_parent.find_all('table', recursive=False)[-2]
    info_tr_list = info_table.find_all('tr', recursive=False)[3:-1]

    group_repo = load_json(sys.argv[2])

    for line_1, line_2, line_3 in chunks(info_tr_list, 3):
        line_1_td_list = line_1.find_all('td', recursive=False)

        #print("Kod grupy: {}".format(line_1_td_list[0].text.strip()))
        group_code = line_1_td_list[0].text.strip()
        #print("Kod kursu: {}".format(line_1_td_list[1].text.strip()))
        course_code = line_1_td_list[1].text.strip()

        line_2_td_list = line_2.find_all('td', recursive=False)
        # print("Prowadzący: {}".format(
        #    re.sub(r'\s+', ' ', line_2_td_list[0].text.strip())))
        #profesor_name = re.sub(r'\s+', ' ', line_2_td_list[0].text.strip())

        line_3_table_td_list = line_3.find('table').find_all('td')
        time_list = [td.text for td in line_3_table_td_list]
        formatted_time_list = []

        for time_element in time_list:
            hours = re.findall(re.compile(r'\d\d:\d\d'), time_element)
            dow_text = time_element.strip()[:2]
            dow_num = DOW_TEXT_NUM_DICT[dow_text]
            ftw_dict = {
                'start': hours[0].replace(':', ''),
                'end': hours[1].replace(':', ''),
                'dow': dow_num
            }
            par_ind = time_element.strip()[3:5]
            if par_ind in ('TP', 'TN'):
                ftw_dict['par'] = 1 if par_ind == 'TN' else 2
            formatted_time_list.append(ftw_dict)

        if 'courses' not in group_repo:
            group_repo['courses'] = {}

        if course_code not in group_repo['courses']:
            course_dict = {}
            group_repo['courses'][course_code] = course_dict
        else:
            course_dict = group_repo['courses'][course_code]

        course_dict[group_code] = formatted_time_list

    save_json(sys.argv[2], group_repo)
Beispiel #6
0
def do_make_mapfile(args):
    """Create a default mapfile with a one file to one file mapping."""
    json = common.load_json(args.bigpack)
    result = {}
    prefix = args.prefix
    if prefix and not prefix.endswith('/'):
        prefix += '/'
    for file_dict_path_str in json.keys():
        file_path, _ = common.unserialize_dict_path(file_dict_path_str)
        path = "/".join(file_path)
        result[path] = prefix + path
    common.save_json(args.mapfile, result)
Beispiel #7
0
def do_filter(args):
    """Filter a pack and write the result to another one"""
    sorter = get_sorter(args)
    walker = get_walker(args)

    walker.set_file_path_filter(args.filter_file_path)
    walker.set_dict_path_filter(args.filter_dict_path)
    walker.set_tags_filter(args.filter_tags)
    walker.set_orig_filter(args.filter_orig)

    pack = common.load_json(args.inputpack)
    new_pack = dict(walker.walk_pack(pack))
    new_pack = sorter(new_pack)
    common.save_json(args.outputpack, new_pack)
Beispiel #8
0
def do_split(args):
    """Split a large packfile to multiple ones according to a mapfile"""
    sorter = get_sorter(args)

    big_pack = common.load_json(args.bigpack)
    map_file = common.load_json(args.mapfile)
    unused_map_files = set(map_file.keys())
    results = {}
    missings = {}
    error = False
    for file_dict_path_str, trans in big_pack.items():
        file_path, _ = common.unserialize_dict_path(file_dict_path_str)
        file_path_str = "/".join(file_path)
        to_file_str = map_file.get(file_path_str)
        if to_file_str is None:
            missings[file_path_str] = missings.get(file_path_str, 0) + 1
        else:
            unused_map_files.discard(file_path_str)

        results.setdefault(to_file_str, {})[file_dict_path_str] = trans

    for missing_file, count in missings.items():
        error = True
        print("missing pack reference for", missing_file,
              "(%d occurences)" % count)

    if error:
        print("Aborting...")
        sys.exit(1)

    for to_file_str, smaller_pack in results.items():
        to_file = to_file_str.split('/')[args.strip:]
        if not to_file:
            print("strip parameter", args.strip, "is too large for path",
                  to_file_str)
            print("Aborting...")
            sys.exit(1)

        actual_dir = os.path.join(args.outputpath, os.sep.join(to_file[:-1]))
        os.makedirs(actual_dir, exist_ok=True)
        smaller_pack = sorter(smaller_pack)
        common.save_json(os.path.join(actual_dir, to_file[-1]), smaller_pack)

    if unused_map_files:
        print(len(unused_map_files),
              "keys where not used in the map file, e.g.:",
              "\n".join(f for i, f in zip(range(10), unused_map_files)))
Beispiel #9
0
def get_thread_comments(request):
    """
        Returns all comments in specified thread and statistics data.
        Keys in request.body:
            fullthreadpath - full directory path to folder with comment files
            sortinguid - if not '': sort all the comments not with timestamp, but user['readby'] timestamp
    """
    result = []

    try:
        data = json.loads(request.body)
        full_thread_path = data['fullthreadpath']

        # get comment files and mark them as read by user
        for commentfile in os.listdir(full_thread_path):
            if commentfile == 'meta':
                continue

            comment = load_json(os.path.join(full_thread_path, commentfile))

            # marking as read
            if config['uid'] not in comment['readby']:
                comment['readby'][config['uid']] = get_timestamp()

            save_json(os.path.join(full_thread_path, commentfile), comment)

            # needed for UI purposes
            comment['editing'] = False
            comment['historing'] = False

            result.append(comment)

        # if sortinguid not null, modify 'timestamp' to sort properly
        if data['sortinguid']:
            for res in result:
                if data['sortinguid'] not in res['readby']:
                    result.remove(res)
                else:
                    res['timestamp'] = res['readby'][data['sortinguid']]

        # need to sort, because os.listdir doesn't return sorted list of files :(
        result = sorted(result, key=lambda comm: comm['timestamp'])

        return JsonResponse({'comments': result, 'stats': get_stats(result)}, safe=False)
    except Exception:
        return HttpResponseServerError('Wystąpił nieznany błąd podczas pobierania komentarzy.')
Beispiel #10
0
    def write_json(self, path):
        """Write a migration plan as json to a file"""
        unchanged = []
        delete = []
        migrate = {}
        for from_str, to_str in self.map.items():
            if to_str is None:
                unchanged.append(from_str)
            else:
                migrate[from_str] = to_str
        for _, _, file_dict_path_str, _ in self.src.iterate():
            delete.append(file_dict_path_str)

        unchanged.sort()
        delete.sort()
        migrate = common.sort_dict(migrate)

        json = {"migrate": migrate, "delete": delete, "unchanged": unchanged}
        common.save_json(path, json)
Beispiel #11
0
    def __output_images(self):
        tmp_path = self.output_folder + "/tmp"
        path = self.output_folder + "/images"
        common.prepare_clean_dir(tmp_path)
        common.prepare_clean_dir(path)
        for segment in self.json_data["segments"]:
            for record in segment["records"]:
                for i, image in enumerate(record["images"]):
                    try:
                        file_name = "%s_%s" % (record["record_id"], i)
                        source_file_name_only = tmp_path + "/" + file_name
                        original_extension = image["src"].split('/')[-1].split(
                            '.')[-1].split("?")[0]
                        source_file_name = source_file_name_only + "." + original_extension
                        target_file_name = path + "/" + file_name + "." + setting.OUTPUT_IMAGE_TYPE

                        r = requests.get(image["src"],
                                         stream=True,
                                         headers={'User-agent': 'Mozilla/5.0'})
                        if r.status_code == 200:
                            with open(source_file_name, 'wb') as f:
                                r.raw.decode_content = True
                                shutil.copyfileobj(r.raw, f)
                        else:
                            continue

                        [R, G,
                         B] = [int(a) for a in image["bg_color"].split(",")]
                        im = Image.open(source_file_name).convert('RGBA')
                        bg = Image.new("RGB", im.size, (R, G, B))
                        bg.paste(im, im)
                        im = bg
                        im.save(target_file_name)

                        image["path"] = target_file_name
                    except Exception:
                        pass

        common.save_json(self.output_folder + "/result.json",
                         self.json_data,
                         encoding=setting.OUTPUT_JSON_ENCODING)

        shutil.rmtree(tmp_path)
Beispiel #12
0
def do_merge(args):
    """Merge multiple pack files into one big pack file."""
    sorter = get_sorter(args)

    big_result = {}
    error = False
    for usable_path, _ in common.walk_files(args.inputpath):
        for file_dict_path_str, value in common.load_json(usable_path).items():
            if big_result.setdefault(file_dict_path_str, value) != value:
                print("Multiple different value found for", file_dict_path_str)
                error = True
    if error:
        if args.allow_mismatch:
            print("Continuing anyway...")
        else:
            print("Aborting...")
            sys.exit(1)
    big_result = sorter(big_result)

    common.save_json(args.bigpack, big_result)
Beispiel #13
0
def do_migrate(args):
    """Migrate one or more pack file according to a migration file."""
    sorter = get_sorter(args)
    sparse_reader = get_sparse_reader(args)
    plan = common.load_json(args.migration_plan)
    plan = types.SimpleNamespace(to_delete=set(plan["delete"]),
                                 unchanged=set(plan["unchanged"]),
                                 migrate=plan["migrate"])
    iterator = common.transform_file_or_dir(args.inputpath, args.outputpath)
    for input_file, output_file, _ in iterator:
        try:
            src_pack = common.load_json(input_file)
        except OSError as error:
            print("Cannot read", input_file, ":", str(error))
            continue
        except ValueError as error:
            print("File", input_file, "contains invalid JSON:", str(error))
            continue

        dst_pack = migrate_pack(args, plan, sparse_reader, src_pack)

        common.save_json(output_file, sorter(dst_pack))
Beispiel #14
0
def write_comment(request):
    """
        Put text from form and create comment file inside thread.
        Keys in request.body:
            comment - text of comment
            fullthreadpath - full path of thread directory where the comment files are stored
    """
    try:
        if not check_connection():
            return HttpResponseServerError('Brak połączenia z Internetem.')

        data = json.loads(request.body)

        if len(data['comment']) == 0:
            return HttpResponseServerError('Treść komentarza nie może być pusta.')

        #obtaining timestamp
        timestamp = get_timestamp()

        # creating new comment
        new_comment = {
            'uid': config['uid'],
            'timestamp': timestamp,
            'readby': { config['uid']: timestamp },
            'comment': data['comment'],
            'history': [{'timestamp': timestamp,
                         'comment': data['comment']}]
        }

        # saving comment to new file with format of comment_file_name_pattern (see common.py)
        save_json(os.path.join(data['fullthreadpath'], new_comment['timestamp'] + file_name_separator + config['uid']),
                  new_comment)

        return JsonResponse(new_comment, safe=False)
    except Exception:
        return HttpResponseServerError('Wystąpił nieznany błąd podczas dodawania komentarza.')
Beispiel #15
0
    
    # rel = 'AtLocation'
    output_data_file_name = os.path.join(root_dirs, 'csqa_data', 'conceptnet','9_rels', '{}_data.json'.format(task))
    mkdir_if_notexist(output_data_file_name)
    # wnlemer = WordNetLemmatizer()
    
    datas = load_jsonl(data_file)
    print('loading conceptnet ...')
    conceptnet = load_rearranged_conceptnet(conceptnet_dir)
    print(f'conceptnet keys: {len(conceptnet)}')
    print('-----/n')
    
    
    cases = enrich(datas, conceptnet)
    print(len(cases))
    save_json(cases, output_data_file_name)

# dev_data
# AtLocation 没有找到三元组的比例:4489/7326
# Causes 6615/7326
# CapableOf 4976/7326
# Antonym 6347/7326
# HasSubevent 6466/7326
# HasPrerequisite 6403/7326
# CausesDesire 6492/7326
# Desires 7104/7326
# PartOf 5770/7326
# HasProperty 5355/7326

# train_data
# AtLocation 没有找到三元组的比例:36314/58446
Beispiel #16
0
 def save_to_file(self, filename):
     json = {}
     for key in self.default_options.keys():
         json[key] = getattr(self, key)
     common.save_json(filename, json)
def main(unused_argv):
    pp = pprint.PrettyPrinter(indent=2, compact=True)

    # Load training and eval data
    (train_x, train_y), (test_x, test_y) = common.load_original_mnist()

    print(train_x.shape, train_x.dtype, train_y.shape, train_y.dtype)
    print(test_x.shape, test_x.dtype, test_y.shape, test_y.dtype)

    def train_model(classifier, log_stats=True):
        start_time = time.time()

        # Train the model
        # profiler_hook = tf.train.ProfilerHook(save_steps=50, output_dir=MODEL_DIR + '/train')

        train_input_fn = tf.estimator.inputs.numpy_input_fn(
            x={"x": train_x},
            y=train_y,
            batch_size=TRAINING_BATCH_SIZE,
            num_epochs=None,
            shuffle=True)
        classifier.train(
            input_fn=train_input_fn,
            steps=TRAINING_STEPS,
            # hooks=[profiler_hook]
        )
        duration = round(time.time() - start_time, 3)

        if log_stats:
            print("Training duration: " + common.duration_to_string(duration))

        return duration

    def eval_model(classifier, log_stats=True):
        start_time = time.time()

        tensors_to_log = {
            # "probabilities": "softmax_tensor",
            "pred": "diff"
        }
        logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log,
                                                  every_n_iter=1)

        eval_input_fn = tf.estimator.inputs.numpy_input_fn(
            x={"x": test_x},
            y=test_y,
            batch_size=EVAL_BATCH_SIZE,
            shuffle=False)
        result = classifier.evaluate(
            input_fn=eval_input_fn,
            steps=EVAL_STEPS,
            # hooks=[logging_hook]
        )
        duration = round(time.time() - start_time, 3)

        if log_stats:
            print("Training duration: " + common.duration_to_string(duration))
            print("Eval result:", result)

        return result, duration

    model_stats_map = {}
    for conf_name, config in model_configs.items():

        # if config["skip"]:
        #     continue

        print("RUN CONFIG: %s" % conf_name)
        model_dir = os.path.join(MODEL_DIR, conf_name)

        # common.clean_dir(model_dir)

        mnist_classifier = tf.estimator.Estimator(model_fn=model_fn,
                                                  model_dir=model_dir,
                                                  params=config)

        eval_results = []
        total_train_duration = 0
        total_eval_duration = 0
        for _ in range(TRAINING_EPOCHS):
            # train_duration = train_model(mnist_classifier)
            # total_train_duration += train_duration

            eval_result, eval_duration = eval_model(mnist_classifier)
            eval_results.append(eval_result)
            total_eval_duration += eval_duration

        final_result = common.get_final_eval_result(eval_results)

        print("Eval results:")
        pp.pprint(eval_results)
        model_stats_map[conf_name] = {
            "model_details": model_details,
            "final_result": final_result,
            "total_train_duration":
            common.duration_to_string(total_train_duration),
            "total_eval_duration":
            common.duration_to_string(total_eval_duration),
        }
        common.save_pickle(
            model_stats_map[conf_name],
            os.path.join(model_details["model_dir"], "last_result.pkl"))
        common.save_json(
            model_stats_map[conf_name],
            os.path.join(model_details["model_dir"], "last_result.json"))

        print("Total training duration: " +
              common.duration_to_string(total_train_duration))
        print("Total eval duration: " +
              common.duration_to_string(total_eval_duration))

    print("Models results:")
    pp.pprint(model_stats_map)
Beispiel #18
0
for badword, count in itertools.islice(iterator, 30):
    print(badword, "(%d)" % count)


# hack: save another pack with errors as note, with quality 'spell'.

patch_me = common.load_json(settings["packfile"])
for file_dict_path_str, errors in grammar_checker.all_the_errors.items():
    grammar_errors = []
    for severity, error, text in errors:
        if error == "grammar error" or error == "spelling error":
            grammar_errors.append(text)

    if not grammar_errors:
        continue

    entry = patch_me[file_dict_path_str]


    end_note = entry.get("note", "")
    if entry.get("quality"):
        end_note = "%s: %s"%(entry["quality"], end_note)

    if end_note:
        grammar_errors.append(end_note)

    entry["quality"] = "spell"
    entry["note"] = "\n".join(grammar_errors)

common.save_json(settings["packfile"]+".spellchecked", patch_me)
Beispiel #19
0
            content = io.read()

        info = common.hexo_info(content)

        link = '/%04d/%02d/%02d/%s/' % (
            info['date'].year,
            info['date'].month,
            info['date'].day,
            name,
        )

        data = {
            'site': config['site'],
            'username': config['username'],
            'repo': config['repo'],
            'token': config['token'],
            'title': info['title'],
            'labels': ['gitment', link],
            'body': '%s%s' % (config['site'], link)
        }

        if check.get(link) is None:
            r_check = api.check_exist(config['username'], config['repo'],
                                      data['labels'])
            if len(r_check) == 0:
                api.new_issue(**data)
                print('Create issue %s.' % data['title'])
            check[link] = True

        common.save_json(F_CHECK, check)
Beispiel #20
0
def add_folder(request):
    """
        Adding new folder to file system and to BitTorrent Sync
    """
    try:
        # receiving keys: path, secret, identity
        data = json.loads(request.body)

        # checking if user passed no identity
        if not data["identity"]:
            return HttpResponseServerError("Pole tożsamości jest puste.")

        # checking if user passed too long identity
        if len(data["identity"]) > 25:
            return HttpResponseServerError("Zbyt długa tożsamość.")

        # checking if dir exists
        if not os.path.isdir(data["path"]):
            return HttpResponseServerError("Wybrany folder nie istnieje.")

        # checking if dir i empty
        if os.listdir(data["path"]) != []:
            return HttpResponseServerError("Wybrany folder nie jest pusty.")

        # checking secret
        if len(data["secret"]) != 33 and len(data["secret"]) != 0:
            return HttpResponseServerError("Wpisany secret ma złą długość.")

        # getting secret
        if not data["secret"]:
            js = json.loads(
                requests.get(
                    "http://" + config["btsync_server_address"] + "/api",
                    params={"method": "get_secrets"},
                    auth=("team", "sync"),
                ).text
            )

            if "read_write" not in js:
                return HttpResponseServerError("Wystąpił błąd podczas uzyskiwania secreta.")

            data["secret"] = js["read_write"]

        # adding folder
        js2 = json.loads(
            requests.get(
                "http://" + config["btsync_server_address"] + "/api",
                params={"method": "add_folder", "dir": data["path"], "secret": data["secret"]},
                auth=("team", "sync"),
            ).text
        )

        if js2["error"] != 0:
            return HttpResponseServerError("Wystąpił błąd podczas dodawania folderu.")

        # updating config
        config["identities"][data["secret"]] = data["identity"]
        save_json(config["application_path"] + "/config.json", config)

        # creating .Comments directory
        if not os.path.isdir(data["path"] + "/.Comments"):
            os.makedirs(data["path"] + "/.Comments")

        # creating .Users directory
        if not os.path.isdir(data["path"] + "/.Users"):
            os.makedirs(data["path"] + "/.Users")

        # adding usr to .Users directory
        save_json(
            data["path"] + "/.Users/" + config["uid"] + ".json",
            {
                "uid": config["uid"],
                "identity": data["identity"]
                # todo(future) another config goes here for example color of comments etc.
            },
        )

        return HttpResponse("Dodano folder.")
    except Exception:
        return HttpResponseServerError("Wystąpił nieznany błąd podczas dodawania folderu.")
Beispiel #21
0
    def __output(self):

        segids = []
        rid = 0

        segs = dict()
        for i, block in enumerate(self.blocks):
            # texts
            texts, images, links, cssselectors = [], [], [], []

            for node in block:
                # extract text from node
                for text in node.stripped_strings:
                    texts.append(text)
                # extract text from node -- end

                # extract images in css background
                background_image_urls = self.__get_css_background_image_urls(
                    node)
                for url in background_image_urls:
                    dict_img = dict()
                    dict_img["alt"] = ""
                    dict_img["src"] = urljoin(self.url, url)
                    r, g, b = self.__get_css_background_color(node)
                    dict_img["bg_color"] = "%d,%d,%d" % (r, g, b)
                    images.append(dict_img)
                # extract images in css background -- end

                # extract images in <img> element
                for img in node.find_all("img"):
                    dict_img = dict()
                    if "src" in img.attrs:
                        dict_img["src"] = urljoin(self.url, img["src"])
                    if "alt" in img.attrs:
                        dict_img["alt"] = img["alt"]
                    images.append(dict_img)
                    r, g, b = self.__get_css_background_color(img)
                    dict_img["bg_color"] = "%d,%d,%d" % (r, g, b)
                # extract images in <img> element

                # extract hyperlink from node
                for link in node.find_all("a"):
                    if "href" in link.attrs:
                        links.append({"href": urljoin(self.url, link["href"])})
                # extract hyperlink from node -- end

                cssselectors.append(self.__get_css_selector(node))

            if len(texts) == 0 and len(images) == 0:
                continue

            lid = block[0]["lid"]

            if lid not in segids:
                segids.append(lid)
            sid = str(segids.index(lid))

            if sid not in segs:
                segs[sid] = {
                    "segment_id": int(sid),
                    "css_selector": self.__get_css_selector(block[0].parent),
                    "records": []
                }

            segs[sid]["records"].append({
                "record_id": rid,
                "texts": texts,
                "images": images,
                "css_selector": cssselectors,
                "links": links
            })
            rid += 1

        self.json_data = dict()
        self.json_data["segments"] = [value for key, value in segs.items()]
        self.json_data["url"] = self.url
        self.json_data["title"] = self.browser.title

        common.save_json(self.output_folder + "/result.json",
                         self.json_data,
                         encoding=setting.OUTPUT_JSON_ENCODING)
Beispiel #22
0
    def __output(self):

        segids = []
        rid = 0

        segs = dict()
        for i, block in enumerate(self.blocks):
            # texts
            texts, images, links, cssselectors, location, size = [], [], [], [], [], []

            for node in block:
                # extract text from node
                for text in node.stripped_strings:
                    texts.append(text)
                # extract text from node -- end

                # extract images in css background
                background_image_urls = self.__get_css_background_image_urls(
                    node)
                for url in background_image_urls:
                    dict_img = dict()
                    dict_img["alt"] = ""
                    dict_img["src"] = urljoin(self.url, url)
                    r, g, b = self.__get_css_background_color(node)
                    dict_img["bg_color"] = "%d,%d,%d" % (r, g, b)
                    images.append(dict_img)
                # extract images in css background -- end

                # extract images in <img> element
                for img in node.find_all("img"):
                    dict_img = dict()
                    if "src" in img.attrs:
                        dict_img["src"] = urljoin(self.url, img["src"])
                    if "alt" in img.attrs:
                        dict_img["alt"] = img["alt"]
                    images.append(dict_img)
                    r, g, b = self.__get_css_background_color(img)
                    dict_img["bg_color"] = "%d,%d,%d" % (r, g, b)
                # extract images in <img> element

                # extract hyperlink from node
                for link in node.find_all("a"):
                    if "href" in link.attrs:
                        links.append({"href": urljoin(self.url, link["href"])})
                # extract hyperlink from node -- end

                cssselectors.append(self.__get_css_selector(node))
                location.append(
                    self.__get_location_by_css(self.__get_css_selector(node)))
                size.append(self.__get_size(self.__get_css_selector(node)))

                # if self.which_end == "wap":
                #     if (self.__get_css_selector(node) ==
                #     "html > body:nth-child(2) > div:nth-child(5) > div:nth-child(4) > div:nth-child(2) > div > div > div:nth-child(4) > div:nth-child(2) > a > p"
                #     or
                #     self.__get_css_selector(node) ==
                #     "html > body:nth-child(2) > div:nth-child(5) > div:nth-child(4) > div:nth-child(2) > div > div > div:nth-child(4) > div:nth-child(2) > a > p:nth-child(2)"):
                #         for text in node.stripped_strings:
                #             print(text)

                #     if (self.__get_css_selector(node) ==
                #     "html > body:nth-child(2) > div:nth-child(5) > div:nth-child(4) > div:nth-child(2) > div > div > a:nth-child(5)"
                #     or
                #     self.__get_css_selector(node) ==
                #     "html > body:nth-child(2) > div:nth-child(3) > div:nth-child(4) > div:nth-child(2) > div > div > a:nth-child(5)"):

                #         # for parent in node.parents:
                #         #     print(self.__get_location_by_css(self.__get_css_selector(parent)))
                #         # aaa = self.__get_location_by_css(self.__get_css_selector(node))
                #         # bbb = self.__get_size(self.__get_css_selector(node))
                #         # self.browser.find_element_by_css_selector(self.__get_css_selector(node)).screenshot(self.output_folder + "/hhh.png")
                #         for text in node.stripped_strings:
                #             print(text)

                # if self.which_end == "web":
                #     if (self.__get_css_selector(node) ==
                #         "html > body:nth-child(2) > div > div:nth-child(6) > div:nth-child(4) > div:nth-child(6) > div > div:nth-child(2)"
                #         or
                #         self.__get_css_selector(node) ==
                #         "html > body:nth-child(2) > div > div:nth-child(8) > div:nth-child(4) > div:nth-child(7) > div > div:nth-child(2)"):
                #         for text in node.stripped_strings:
                #             print(text)

            if len(texts) == 0 and len(images) == 0:
                continue

            lid = block[0]["lid"]

            if lid not in segids:
                segids.append(lid)
            sid = str(segids.index(lid))

            if sid not in segs:
                segs[sid] = {
                    "segment_id": int(sid),
                    "css_selector": self.__get_css_selector(block[0].parent),
                    "records": []
                }

            segs[sid]["records"].append({
                "record_id": rid,
                "texts": texts,
                "images": images,
                "css_selector": cssselectors,
                "links": links,
                "location": location,
                "size": size
            })
            rid += 1

        self.json_data = dict()
        self.json_data["segments"] = [value for key, value in segs.items()]
        self.json_data["url"] = self.url
        self.json_data["title"] = self.browser.title

        # region = (int(aaa['x']), int(aaa['y']), int(aaa['x'] + bbb['width']), int(aaa['y'] + bbb['height']))
        # i = Image.open(self.output_folder + "/screenshot.png")
        # i.crop(region).save(self.output_folder + "/hhh.png")

        common.save_json(self.output_folder + "/result.json",
                         self.json_data,
                         encoding=setting.OUTPUT_JSON_ENCODING)
Beispiel #23
0
def write_new_thread(request):
    """
        Prepare file system (create directory etc.) for comment files and create the first one.
        Keys in request.body:
            insidepath - relative path inside shared folder
            folderpath - path of shared folder
            topic - text of thread's topic
            fileabout - if thread is a discussion about some shared file, this indicates which file it is (only name
                        of the file, not full path)
            comment - text of first comment
    """
    try:
        if not check_connection():
            return HttpResponseServerError('Brak połączenia z Internetem.')

        # getting and preparing data
        timestamp = get_timestamp()
        data = json.loads(request.body)

        # generating full path of a thread directory
        full_thread_path = os.path.join(data['folderpath'],
                                        '.Comments',
                                        data['insidepath'][1:],
                                        timestamp + file_name_separator + config['uid'])

        if os.path.isdir(full_thread_path):
            return HttpResponseServerError('Podany wątek już istnieje.')

        # thread dict sent to GUI
        response = {
            'timestamp': timestamp,
            'name': data['topic'],
            'type': 'thread',
            'numberofcomments': 1,
            'unreadcomment': False,
            'lastcomment': timestamp,
            'fullpath': full_thread_path,
            'insidepath': full_thread_path.replace(data['folderpath'] + '/.Comments', ''),
        }

        # data stored in file system as 'meta' file
        meta = {
            'uid': config['uid'],
            'timestamp': timestamp,
            'topic': data['topic'],
            'fileabout': '' if data['fileabout'] == "<brak>" else os.path.join(data['insidepath'], data['fileabout'])
        }

        # data stored in comment file
        comment = {
            'uid': config['uid'],
            'timestamp': timestamp,
            'comment': data['comment'],
            'readby': { config['uid']: timestamp },
            'history': [{ 'timestamp': timestamp,
                          'comment': data['comment'] }]
        }

        # creating thread's directory and writing data to files
        os.makedirs(full_thread_path)
        save_json(os.path.join(full_thread_path, 'meta'), meta)
        save_json(os.path.join(full_thread_path, timestamp + file_name_separator + config['uid']), comment)

        return JsonResponse(response, safe=False)
    except Exception:
        return HttpResponseServerError('Wystąpił nieznany błąd podczas dodawania nowego wątku.')