Beispiel #1
0
    def start_optimization(index):
        """
        优化elasticsearch setting ,加快数据上传速度
        :param index: elasticsearch index
        :return:
        """
        print("优化任务开启.........")
        config = ConfigParser.ConfigParser()
        config.read(
            os.path.join(os.path.dirname(os.path.dirname(__file__)),
                         'config/config.ini'))
        max_bytes_per_sec = config.get("Optimization_Config",
                                       "max_bytes_per_sec")
        Query().setting(
            index=index,
            data=
            '"persistent" : {{"indices.store.throttle.max_bytes_per_sec" : "{0}mb"}}'
            .format(max_bytes_per_sec))

        Query().setting(
            index=index,
            data='"transient" : {"indices.store.throttle.type" : "none" }')

        Query().setting(index=index,
                        data='{"index": {"refresh_interval": "-1"}}')
Beispiel #2
0
 def path_list_changed(self):
     """Update runnable list."""
     with QMutexLocker(self.mutex):
         runnables = []
         for order, path in enumerate(self.paths):
             name, _ = os.path.splitext(os.path.basename(path))
             if name in self.d:
                 runnables.append((name,
                                   Runnable(name=name,
                                            path=path,
                                            query=self.d[name].query,
                                            worker=self.worker,
                                            order=order)))
             else:
                 runnables.append(
                     (name,
                      Runnable(name=name,
                               path=path,
                               query=Query(text='',
                                           insertion_cost=1,
                                           first_insertion_cost=50,
                                           prepend_first_insertion_cost=5,
                                           append_first_insertion_cost=10,
                                           deletion_cost=100,
                                           substitution_cost=100,
                                           transposition_cost=10),
                               worker=self.worker,
                               order=order)))
         self.d = dict(runnables)
Beispiel #3
0
def task_end():
    config = ConfigParser.ConfigParser()
    config.read(os.path.join(os.path.dirname(__file__), 'config/task.ini'))
    elasticsearch_id = config.get("elasticsearch_id", 'id')
    for id in elasticsearch_id.split(','):
        Query().delete(index=id)
    os.remove("config/task.ini")
Beispiel #4
0
 def sql_analysis(cls):
     query_data = BaseFunction.join_query(keyword="URI",
                                          rule_name="sql_analysis")
     result = Query().query(None, data=query_data)
     BaseFunction.result_dispose(
         result,
         index=None,
         query=query_data,
         none_message="========SQL注入分析未检测到威胁========")
Beispiel #5
0
 def web_command_attack_analysis(cls):
     query_data = BaseFunction.join_query(keyword="URI",
                                          rule_name="common_web_analysis")
     result = Query().query(None, data=query_data)
     BaseFunction.result_dispose(
         result,
         index=None,
         query=query_data,
         none_message="========Web 通用攻击分析未检测到威胁========")
Beispiel #6
0
 def http_method_analysis(cls):
     query_data = BaseFunction.join_query(keyword="method",
                                          rule_name="http_method_analysis")
     result = Query().query(index=None, data=query_data)
     BaseFunction.result_dispose(
         result,
         index=None,
         query=query_data,
         none_message="========http method 分析未检测到威胁(无不安全的http请求方法)========")
Beispiel #7
0
    def restore_settings(index):
        """
        恢复elasticsearch setting 更改,方便数据分析查询
        :param index: elasticsearch index
        :return:
        """

        print("还原配置.........")
        Query().setting(
            index=index,
            data=
            '"persistent" : {{"indices.store.throttle.max_bytes_per_sec" : "20mb"}}'
        )

        Query().setting(
            index=index,
            data='"transient" : {"indices.store.throttle.type" : "merge" }')

        Query().setting(index=index,
                        data='{"index": {"refresh_interval": "1s"}}')
Beispiel #8
0
    def backup_file_analysis(self, index):
        """
        备份文件检测
        :param index: elasticsearch 中的id
        :return:
        """

        # 读取config.ini配置文件中的rule
        config = ConfigParser.ConfigParser()
        config.read('config/config.ini')
        rule = config.get('backup_file_analysis', 'rule')

        elastic = Query(ip="127.0.0.1", port=9200)
        result = elastic.query(index=index, data=rule)

        BaseFunction.result_dispose(
            result,
            index=index,
            query=rule,
            none_message="========备份文件分析未检测到威胁========")
Beispiel #9
0
 def _refresh_tasks(self, hwnds, query=None):
     for hwnd in hwnds:
         if not hwnd in self.tasks:
             self.tasks[hwnd] = Task(
                 hwnd=hwnd,
                 usetime=datetime.now(),
                 query=Query(text='' if query is None else query,
                             insertion_cost=1,
                             first_insertion_cost=50,
                             prepend_first_insertion_cost=5,
                             append_first_insertion_cost=10,
                             deletion_cost=100,
                             substitution_cost=100,
                             transposition_cost=10))
         elif not query is None:
             self.tasks[hwnd].query.update(query.lower())
Beispiel #10
0
def read_log_file():
    id = ""  # elasticsearch id列表
    config = ConfigParser.ConfigParser()
    config.add_section("elasticsearch_id")

    for root, path, files in os.walk('log'):
        for file in files:
            id = id + file.split('-')[2].split('.')[0] + ","
            Query().put(index=file.split('-')[2].split('.')[0],
                        data="")  #创建elasticsearch id
            upload_start(index=file.split('-')[2].split('.')[0])
            line_number = 0
            size = 0
            with gzip.open(os.path.join(root, file), 'r') as f:
                while f.readline() != "":
                    line_number += 1

            print("【!】文件读取成功........")

            with gzip.open(os.path.join(root, file), 'r') as f:
                logformat = []
                logline = f.readline()
                print("【+】{} 开始上传........".format(file))
                while logline != "":
                    for i in xrange(100000):
                        if logline != "":
                            loglinemanage = format.LogClass(log=logline)
                            logformat.append(loglinemanage.formatting())
                            logline = f.readline()
                    elasticE = elasticEngine.elasticManage()
                    elasticE.saveMessage(logformat,
                                         file.split('-')[2].split('.')[0])
                    logformat = []

                    size = size + 100000
                    view_bar(size, line_number)
            upload_stop(index=file.split('-')[2].split('.')[0])
            time.sleep(5)
            print("->")

        print("【!】写入配置文件........")
        config.set('elasticsearch_id', 'id', id.rstrip(','))
        config.write(open('config/task.ini', 'w'))
        print("【!】配置文件写入成功........【task.ini】")
Beispiel #11
0
    def received_message(self, message):
        """ Callback method for received messages.

        Implements the behaviour of the listeners. First, we extract important values from the message, such as the
        Attributes dict, client id, command and filename, if proceeds.
        If the client is not in our list of clients, we don't process the message if it's an echo request (client is in
        the middle of a chat) message. Otherwise, we fulfill the client query, polling a new query so the listener which
        implements our interface can act in consequence, and remove the message from the queue.
        :param message: Received message.
        """
        attributes = message['MessageAttributes']
        client_id = attributes['Author']['StringValue']
        command = attributes['Command']['StringValue']
        filename = client_id + '.json'
        if client_id not in self._clients.keys():
            logging.info('Detected command ' + command + ' from client ' +
                         client_id)
            if command == Command.ECHO_REQUEST.value or command == Command.CLIENT_END.value:
                self._sqs_manager.change_visibility_timeout(
                    self._sqs_manager.get_queue_url(self.inbox_queue_name),
                    message)
            else:
                if command == Command.NEW_CLIENT.value:
                    self._queries.append(
                        Query(client_id, filename, Command.NEW_CLIENT))
                elif command == Command.BEGIN_ECHO.value:
                    self._queries.append(
                        Query(client_id, filename, Command.BEGIN_ECHO))
                elif command == Command.REMOVE_CLIENT.value:
                    self._queries.append(
                        Query(client_id, filename, Command.REMOVE_CLIENT))
                elif command == Command.DOWNLOAD_REQUEST.value:
                    self._queries.append(
                        Query(client_id, filename, Command.DOWNLOAD_REQUEST))
                elif command == Command.DOWNLOAD_URL_REQUEST.value:
                    self._queries.append(
                        Query(client_id, filename,
                              Command.DOWNLOAD_URL_REQUEST))
                self._sqs_manager.delete_message(
                    message,
                    self._sqs_manager.get_queue_url(self.inbox_queue_name))
        elif client_id in self._clients.keys():
            message_body = message['Body']
            if command == Command.CLIENT_END.value:
                self._queries.append(
                    Query(client_id, filename, Command.CLIENT_END))
                messages = ''
                for each_message in self._messages[client_id]:
                    messages += each_message + ', '
                logging.info('Client ' + client_id +
                             'left echo-chat. Messages to be updated to S3: ' +
                             messages)
            elif command == Command.ECHO_REQUEST.value:
                logging.info('Echoing : ' + message_body)
                logging.info('Echoing a message with command : ' + command)
                self.send_message(message_body, 'echo', client_id)
            else:
                logging.warning('Unexpected command. Client ' +
                                str(client_id) + ' is in client list, cmd: ' +
                                command)

            if client_id in self._messages:
                self._messages[client_id].append(message_body)
            else:
                conversation = []
                conversation.append(message_body)
                self._messages[client_id] = conversation

            self._sqs_manager.delete_message(
                message,
                self._sqs_manager.get_queue_url(self.inbox_queue_name))
"""
Extracts all Pull Request numbers for the orgs
listed in data/orgs-community-size-2020.json.
Saves the output to data/pull-requests.json
"""

from utils import Query
from utils.API import API
from utils.FileLoader import FileLoader

api = API()
orgs = FileLoader.load('orgs-community-size-2020.json')

pull_requests = []

for org in orgs:
    [keys, query] = Query.getPRsForOrg(org['org'])
    orgPRs = api.asArray(query, keys)
    pull_requests += orgPRs

FileLoader.save('pull-requests.json', pull_requests)
"""
Get the orgs with the largest communities in 2020.
Save the data to data/orgs-community-size-2020.json
"""
from utils import Query
from utils.API import API
from utils.FileLoader import FileLoader

api = API()

[keys, query] = Query.getOrgs()

orgs = api.asArray(query, keys)
FileLoader.save('orgs-community-size-2020.json', orgs)
Beispiel #14
0
    def xss_analysis(self):
        urls = []
        clf = None
        data = """
        {
          "aggs": {
            "group_by_uri": {
              "terms": {
                "field": "URI.keyword",
                "size": 2147483647
              }
            }
          },
          "size": 0
        }
        """
        result = Query().query(index=None, data=data)  #返回所有去重之后的URI列表
        for result_message in result.get("aggregations").get(
                "group_by_uri").get("buckets"):
            urls.append(
                result_message.get('key'))  #得到elasticsearch中所有的经过去重后的URI信息

        with open("ML/xss_ML.pkl", 'rb') as f:
            clf = cPickle.load(f)

        for url in urls:
            result = clf.predict(numpy.mat(self.get_feature(url=url)))
            if result[0] == 1:
                data = """
                {{
                  "query": {{
                    "match": {{
                      "URI": "{0}"
                    }}
                  }},
                  "aggs": {{
                    "group_by_ip_address": {{
                      "terms": {{
                        "field": "ip_address.keyword",
                        "size": 2147483647
                      }}
                    }}
                  }}
                }}
                """.format(url)  #根据URI反查IP地址
                result = Query().query(index=None, data=data)
                buckets = result.get("aggregations").get(
                    "group_by_ip_address").get("buckets")
                if len(buckets) > 20:
                    print(url + "  "),
                    print("反查其IP数为{},可能为误报".format(len(buckets)))
                else:
                    print(url)
                    data = """
                                    {{
                                      "query": {{
                                        "match": {{
                                          "URI": "{0}"
                                        }}
                                      }},
                                      "aggs": {{
                                        "group_by_ip_address": {{
                                          "terms": {{
                                            "field": "ip_address.keyword",
                                            "size": 2147483647
                                          }}
                                        }}
                                      }}
                                    }}
                                    """.format(url)  # 根据URI反查IP地址
                    result = Query().query(index=None, data=data)
                    buckets = result.get("aggregations").get(
                        "group_by_ip_address").get("buckets")
                    print("产生此URL的ip地址为:")
                    for bucket in buckets:
                        print(bucket.get("key"))
Beispiel #15
0
from utils import Query
from utils.API import API
from utils.FileLoader import FileLoader

api = API()
pull_requests = FileLoader.load('pull-requests.json')

repos = defaultdict(list)
for pr in pull_requests:
    if not pr['repo_name'] or not pr['number']:
        continue
    repos[pr['repo_name']].append(pr['number'])

comments = []
for i, repo in enumerate(repos):
    print("{:}/{:} ({:.2f}%)".format(i, len(repos), (i / len(repos) * 100)))
    try:
        # Omit the empty instances
        if not repos[repo]:
            continue
        [keys, query] = Query.getCommentsForRepo(repo, repos[repo])
        prComments = api.asArray(query, keys)
        comments += prComments

        if i % 1000 == 0:
            FileLoader.save('comments.json', comments)
    except:
        pass

FileLoader.save('comments.json', comments)
"""
Extracts all organisations that are
listed in orgs-community-size-2020.json, finds
all merged Pull Requests in 2020.
Saves the output to data/pull-requests-status.json
"""

from utils import Query
from utils.API import API
from utils.FileLoader import FileLoader

api = API()
orgs = FileLoader.load('orgs-community-size-2020.json')

pull_requests = []

for org in orgs:
    [keys, query] = Query.getMergedPRsFrOrg(org['org'])
    orgPRs = api.asArray(query, keys)
    pull_requests += orgPRs

FileLoader.save('pull-requests-merged.json', pull_requests)