Ejemplo n.º 1
0
def main(args):
    if not os.path.exists(args.server_stream):
        Log.error("server_stream file not exist.")
        return

    Log.info("Start parsing iSmartAlarm diagnotics stream...")

    isap = ISADiagnoticsStreamParser(args.server_stream)
    unstructured_log = isap.get_unstructured_log()
    sensor_log = isap.get_sensor_log()

    with Elastic(index='unstructured_log',
                 doc_type='unstructured_log') as elastic:
        datetime_log = []

        for log in unstructured_log:
            if 'data_type' in log.keys():
                if log['data_type'] == 'datetime':
                    datetime_log.append(log)
        elastic.upload(datetime_log, 'data')

    with Elastic(index='sensor_log', doc_type='sensor_log') as elastic:
        elastic.upload(sensor_log, 'datetime')

    Log.info("Successfully upload server_stream data.")

    del isap
Ejemplo n.º 2
0
    def scan(self, url):
        """Scan and crawl url which user requested."""
        Log.i("Trying to crawl {} url".format(url))

        domain = urlparse(url).netloc
        obj = DynamicObject()

        # Step 1. Visit website using headless tor browser
        Log.d("Step 1. Visiting {} website using headless browser".format(url))

        browser = HeadlessBrowser(ini=self.ini, tor_network=True)

        report = browser.run(url)

        del browser

        # if browser have an exception return from here
        if not report:
            return obj

        obj.webpage = report

        # Step 2. Scan common service port
        Log.d(
            "Step 2. Scanning {} domain's common service port".format(domain))
        obj.port = self._portscan(domain)

        # Step 3. TO-DO

        return obj
Ejemplo n.º 3
0
    def run(self, url):
        try:
            self.driver.get(url)
        except:
            # browser scan failed
            Log.e("Browser has an error.")
            return

        # if driver source is none
        if not self.get_source():
            return

        # run HTML parser for parse data from source
        try:
            # beautifulsoup object for parse html source
            self.soup = BeautifulSoup(self.driver.page_source, 'html.parser')
        except:
            # website source code is not HTML
            Log.e("Invalid HTML Source code.")
            return

        # get HAR from driver
        self.har = json.loads(self.driver.get_log('har')[0]['message'])

        report = DynamicObject({
            'url': url,
            'domain': urlparse(url).netloc,
            'title': self.get_title(),
            'screenshot': self.get_screenshot(),
            'source': self.get_source(),
            'sublinks': self.get_sublinks(),
            'language': self.get_language(),
            'headers': self.get_headers(),
            'tree': self.get_website_tree(),
        })

        return report
Ejemplo n.º 4
0
    def save(self):
        """
        Save domain on database and request crawling.
        :return: None
        """
        engine = Engine.create(self.ini)
        with Session(engine=engine) as session:
            for url in self.urls:
                task_id = uuid4().hex

                try:
                    # add url into database
                    session.add(Domain(uuid=task_id, url=url))
                    session.commit()

                    task = run_crawler.apply_async(args=(url, ),
                                                   task_id=task_id)
                    Log.i("Crawler issued a new task id {} at {}".format(
                        task.task_id, url))
                except:
                    Log.d(
                        "This {} url already saved into database.".format(url))
                finally:
                    self.urls.remove(url)
Ejemplo n.º 5
0
def train(X_train,
          X_val,
          y_train,
          y_val,
          train_config: dict = train_config,
          global_config: dict = global_config,
          save_model: bool = True):

    # create paths if necessary
    for path in global_config.values():
        create_dirs(path)

    # model name and path
    name = "_".join([train_config["DATE"], train_config["SESSION_NAME"]])
    model_path = os.path.join(global_config["WEIGHT_DIR"], name)

    # instantiate model
    model = train_config["MODEL"](**train_config["MODEL_CONFIG"])

    # fit to training data
    model.fit(X_train, y_train)

    # dump model to disk
    if save_model:
        joblib.dump(model, model_path + ".joblib")

    # log metrics to csv
    train_predictions = model.predict(X_train)
    val_predictions = model.predict(X_val)

    log_content = train_config.copy()
    log_content["TRAIN_LOSS"] = train_config["LOSS"](y_train,
                                                     train_predictions)
    log_content["VAL_LOSS"] = train_config["LOSS"](y_val, val_predictions)
    log_content["TRAIN_METRICS"] = {}
    log_content["VAL_METRICS"] = {}

    for key, metric in train_config["METRICS"].items():
        log_content["TRAIN_METRICS"] = metric(y_train, train_predictions)
        log_content["VAL_METRICS"][key] = metric(y_val, val_predictions)

    log_path = os.path.join(global_config["LOG_DIR"], train_config["LOGFILE"])
    write_log(log_path, log_content)

    # log metrics to mlflow
    logger = Log(train_config=train_config,
                 run_name=train_config["SESSION_NAME"])
    logger.log_metric("Train Loss", log_content["TRAIN_LOSS"])
    logger.log_metric("Validation Loss", log_content["VAL_LOSS"])

    # return validation loss
    return log_content["VAL_LOSS"]
Ejemplo n.º 6
0
    def save(self, merge, frame):
        """Convert and save into playable video."""
        Log.info("Converting video file codec format...")

        for video in self.rawvideos:
            os.system(f"ffmpeg -f h264 -r 10 -i {video} -c copy {video.split('.')[0]}.mp4")

            # remove original file
            if os.path.exists(video):
                os.remove(video)

        Log.info("Successfully convert the video file codec.")

        if merge:
            Log.info("Merging videos..")

            videos = '|'.join([video.split('.')[0] + ".mp4" for video in self.rawvideos])
            os.system(f"ffmpeg -f concat -i \"concat:{videos}\" -c copy video.mp4")

            for video in self.rawvideos:
                os.remove(f"{video.split('.')[0]}.mp4")

            Log.info(f"Successfully merged {len(self.rawvideos)} videos.")
Ejemplo n.º 7
0
def run(source):
    _class = source()
    status = _class.active

    if _class.active:
        Log.i("Trying to run {} source".format(_class.name))
        try:
            _class.collect()
        except:
            Log.e("Failed to collect data from {} source".format(_class.name))
        if _class.urls:
            _class.save()
    else:
        Log.i("{} source is now disabled".format(_class.name))

    del _class

    return status
Ejemplo n.º 8
0
    def collect(self):
        Log.d("Start collecting from freshonion API")
        response = HTTP.request(
            url='http://zlal32teyptf4tvi.onion/json/all',
            tor_network=True,
            ini=self.ini
        )

        if not response:
            Log.e("Exception accrued while loading website.")
            return

        if response.status_code == 200:
            rows = response.json()
            Log.i("{} url detected from freshonion".format(len(rows)))

            for row in rows:
                url = self._get_formed_url(row)
                if url not in self.urls:
                    self.urls.append(url)
Ejemplo n.º 9
0
def test_write_debug():
    Log.d("Test Debugging Message")
Ejemplo n.º 10
0
 def __init__(self, ini):
     Log.i("Starting crawler")
     self.ini = ini
Ejemplo n.º 11
0
    def _portscan(self, domain):
        """Scan and check opened port."""
        socket = Socket(
            tor_network=True,
            ini=self.ini,
        )

        # common service port list
        services = [
            {
                'number': 20,
                'status': False
            },
            {
                'number': 21,
                'status': False
            },
            {
                'number': 22,
                'status': False
            },
            {
                'number': 23,
                'status': False
            },
            {
                'number': 25,
                'status': False
            },
            {
                'number': 80,
                'status': False
            },
            {
                'number': 110,
                'status': False
            },
            {
                'number': 123,
                'status': False
            },  # NTP
            {
                'number': 143,
                'status': False
            },
            {
                'number': 194,
                'status': False
            },  # IRC
            {
                'number': 389,
                'status': False
            },
            {
                'number': 443,
                'status': False
            },
            {
                'number': 993,
                'status': False
            },  # IMAPS
            {
                'number': 3306,
                'status': False
            },
            {
                'number': 3389,
                'status': False
            },
            {
                'number': 5222,
                'status': False
            },  # XMPP
            {
                'number': 6667,
                'status': False
            },  # Public IRC
            {
                'number': 8060,
                'status': False
            },  # OnionCat
            {
                'number': 8333,
                'status': False
            },  # Bitcoin
        ]

        for i in range(len(services)):
            opened = socket.ping_check(domain, services[i]['number'])
            services[i]['status'] = opened
            Log.d("{} port is {}".format(services[i]['number'],
                                         'opened' if opened else 'closed'))

        del socket

        return services
Ejemplo n.º 12
0
    def extract(self, merge, frame, add_timeline):
        """Extract frames from database."""
        Log.debug("Extracting videos from database...")

        if frame:
            with sqlite3.connect(self.database) as con:
                cur = con.cursor()
                cur.execute("SELECT frame_time, gop_start_rowid, sps_bytes, pps_bytes, frame_bytes, chunk_complete FROM frame_raw_data_table")
                rows = cur.fetchall()
            sps_bytes = None
            pps_bytes = None
            videobuf = None
            count = 0

            timestamps_by_video = {}
            frames_by_video = {}

            for frame in rows:
                frame_time, gop_start_rowid, _sps_bytes, _pps_bytes, frame_bytes, chunk_complete = frame

                if gop_start_rowid == -1:
                    # set new sps and pps bytes
                    sps_bytes = _sps_bytes
                    pps_bytes = _pps_bytes
                    videobuf = pps_bytes + sps_bytes + frame_bytes
                    timestamps_by_video[count] = [frame_time]
                else:
                    videobuf = videobuf + frame_bytes
                    timestamps_by_video[count].append(frame_time)

                if chunk_complete == 1:
                    frames_by_video[count] = videobuf
                    sps_bytes = None
                    pps_bytes = None
                    videobuf = None
                    count += 1

            if videobuf:
                frames_by_video[count] = videobuf

            for key in frames_by_video.keys():
                # save h264 file
                with open(os.path.join(self.output, f'{key}.h264'), 'wb') as f:
                    f.write(frames_by_video[key])

                i = 0
                for timestamp in timestamps_by_video[key]:
                    os.system(f'ffmpeg -i {self.output}/{key}.h264 -c:v libx264 -filter:v "select=gte(n\,{i})" -frames:v 1 -f h264 {self.output}/{key}_{i}.h264')
                    os.system(f'ffmpeg -i {self.output}/{key}_{i}.h264 -frames:v 1 -f image2 {self.output}/{self._gen_filename(timestamp)}.png')
                    os.remove(f'{self.output}/{key}_{i}.h264')
                    i += 1

                os.remove(f'{self.output}/{key}.h264')
            Log.info(f"Successfully saved image by frame.")

        else:
            with sqlite3.connect(self.database) as con:
                cur = con.cursor()
                cur.execute("SELECT * FROM frame_raw_data_table")
                rows = cur.fetchall()

            videobuf = ""  # temporary buffer for constructing video
            videoname = ""  # name of video file
            count = 0  # video file counter

            for row in rows:
                if row[4]:
                    if videoname:
                        with open(videoname, "wb") as f:
                            f.write(videobuf)
                        self.rawvideos.append(videoname)

                    videobuf = row[5]
                    videobuf += row[4]
                    videobuf += row[6]

                    videoname = os.path.join(self.output, f"{count}.tmp")
                    self.videotimes[videoname] = [row[0]]

                    count += 1
                else:
                    videobuf = videobuf + row[6]

                    if row[0] not in self.videotimes[videoname]:
                        self.videotimes[videoname].append(row[0])

            if videobuf:
                with open(videoname, "wb") as f:
                    f.write(videobuf)
                self.rawvideos.append(videoname)

            Log.info(f"Successfully extrated {count} video files.")

            self.save(merge)

            documents = []

            for filename in self.videotimes.keys():
                runtime = self.videotimes[filename]
                start, end = to_datetime(runtime[0]), to_datetime(runtime[-1])
                filename = os.path.basename(filename).replace('tmp', 'mp4')

                documents.append({
                    'start_time': start,
                    'end_time': end,
                    'filename': filename
                })

            # write history as file
            with open(os.path.join(self.output, 'video_list.txt'), 'w') as f:
                for document in documents:
                    f.write(f"{document['filename']}: {document['start_time']} - {document['end_time']}\n")

            # upload to elasticsearch for add timeline
            if add_timeline:
                with Elastic(index='nest', doc_type='video') as elastic:
                    elastic.upload(documents, 'start_time')
Ejemplo n.º 13
0
 def __del__(self):
     Log.i("Ending crawler")
     del self
Ejemplo n.º 14
0
def test_write_warning():
    Log.w("Test Warning Message")
Ejemplo n.º 15
0
def test_write_error():
    Log.e("Test Error Message")
Ejemplo n.º 16
0
def test_write_info():
    Log.i("Test Info Message")
Ejemplo n.º 17
0
    def save(self, documents):
        with Elastic(index='alexa', doc_type='activity') as elastic:
            elastic.upload(documents, 'time')

        Log.info("Successfully uploaded data into elasticsearch.")
Ejemplo n.º 18
0
def test_write_critical():
    Log.c("Test Critical Message")