Esempio n. 1
0
    def setUp(self):
        self.file = 'file.txt'
        self.data = [
            '4 3', '<tag1 value = "HelloWorld">', '<tag2 name = "Name1">',
            '</tag2>', '</tag1>', 'tag1.tag2~name', 'tag1~name', 'tag1~value'
        ]
        self.number_of_tags = 4
        self.number_of_queries = 3

        self.tags = [
            '<tag1 value = "HelloWorld">', '<tag2 name = "Name1">', '</tag2>',
            '</tag1>'
        ]
        self.tag1_parsed = {'tag1~value': 'HelloWorld'}
        self.tag1_name = 'tag1'

        self.tags_all_parsed = {
            'tag1~value': 'HelloWorld',
            'tag1.tag2~name': 'Name1',
            'tag2~name': 'Name1'
        }

        self.queries = ['tag1.tag2~name', 'tag1~name', 'tag1~value']
        self.queries_answers = ['Name1', 'Not Found!', 'HelloWorld']

        self.Parser = Parser(self.file)
Esempio n. 2
0
def main():
    """
    Main method that open and close files
    and ckecks the belonging of word to language defined
    by the automata.
    """
    in_file_name = "input.txt"
    out_file_name = "output.txt"

    in_file = open(in_file_name, 'r')

    out_file = open(out_file_name, 'w')
    out_file.write('IlgizZamaleev' + '\n')

    problems = int(in_file.readline())

    for i in range(problems):
        automato = Parser().parse_automata(in_file)

        tests = int(in_file.readline())

        out_file.write(str(i + 1) + '\n')

        for j in range(tests):
            word = in_file.readline()[:-1]
            out_file.write(automato.check_word(word) + '\n')

    in_file.close()
    out_file.close()
Esempio n. 3
0
    def read(img: Union[str, PicHandler], path: str='D:\\Project\\') -> ElemBlock:
        if isinstance(img, str):
            ph = PicHandler(img, path=path)
        else:
            ph = img
        blocks = Parser.parse(ph.blocksOfPixels(), sensivity=4, merge='math')
        Parser.swap_coordinates(blocks)
        elemBlocks = [FormulaRecognizer.recBlock(block) for block in blocks]

        try:
            f = Formula(elemBlocks)
        except:
            return ElemBlock('', Position(0, 0, 0, 0), ok=False)
        print(f.texCode)
        #ph._show()
        return f.getFormula()
Esempio n. 4
0
def translate(file):
    writer = CodeWriter(file)
    if '.vm' in file:
        parser = Parser(file)
    else:
        if os.getcwd().split('/')[-1] != file:
            os.chdir(file)
        for fileName in glob.glob('*.vm'):
            if fileName != const.SYS:
                writer.setFileName(fileName)
                parser = Parser(fileName)
                parse(parser, writer)
        writer.setFileName(fileName)
        parser = Parser(const.SYS)
    parse(parser, writer)
    writer.close()
Esempio n. 5
0
def main():
    parser = Parser(HTML.REDIS, PARSER.ELASTICSEARCH)
    """Crawler start"""
    crawler = Crawler(DB.MYSQL, HTML.REDIS, parser)

    try:
        crawler.run()
    except KeyboardInterrupt:
        crawler.stop()
        sys.exit(0)
Esempio n. 6
0
def inline_place(bot, update):
    query = update.inline_query.query
    help = 'Scrivi il nome di una città per cercare solo gli eventi in quella città.\nPuoi anche cercare in una intera regione!\nOppure puoi cercare nei dintorni di una città scrivendo "dintorni" dopo il nome della città'
    if not query or len(query) == 0:
        return
    else:
        if query == 'help':
            results = [
                InlineQueryResultArticle(
                    id=uuid4(),
                    title=help,
                    input_message_content=InputTextMessageContent(
                        "Un attimo, sto usando @cercaEventi_bot"))
            ]
            update.inline_query.answer(results)
            return
        p = Parser(query)
        events = p.getEvents()
        if len(events) > 0:
            results = [
                InlineQueryResultArticle(
                    id=uuid4(),
                    title=event['name'],
                    input_message_content=InputTextMessageContent(
                        "{}\n<b>{}</b>\n<b>{}</b>\n{}".format(
                            event['name'], event['date'], event['place'],
                            event['link']),
                        parse_mode='HTML'),
                    thumb_url=event['img'],
                    description="{}\n{}".format(event['date'], event['place']))
                for event in events
            ]
        else:
            results = [
                InlineQueryResultArticle(
                    id=uuid4(),
                    title='Nessun evento trovato per {}'.format(query),
                    input_message_content=InputTextMessageContent(
                        'Nessun evento trovato per {}'.format(query)))
            ]
        update.inline_query.answer(results)
 def __init__(self, version):
     self.split_version = Parser().parse(version)
Esempio n. 8
0
from grammar import Grammar
from my_parser import Parser


def display_options():
    print("1 Display Terminals")
    print("2 Display Non-terminals")
    print("3 Display Productions")
    print("4 Choose production to do closure")
    print("5 Choose symbol to do goto on state(ClosureLR of S'->S)")
    print("6 Col Can")


if __name__ == '__main__':
    g: Grammar = Grammar('g1.in')
    p = Parser(g)
    while True:
        display_options()
        i = int(input())
        if i == 1:
            print(p.get_e())
        elif i == 2:
            print(p.get_n())
        elif i == 3:
            print(p.get_p())
        elif i == 4:
            __user_input = input("Give input:")
            print(p.closure_lr(__user_input))
        elif i == 5:
            __user_input = input("Give input:")
            result = p.go_to_lr(p.closure_lr("S'->.S"), __user_input)
Esempio n. 9
0
        valid_loss, valid_acc = eval_net(args, model, validloader, criterion)
        vbar.set_description(
            'valid set - average loss: {:.4f}, accuracy: {:.0f}%'.format(
                valid_loss, 100. * valid_acc))

        if not args.filename == "":
            with open(args.filename, 'a') as f:
                f.write('%s %s %s %s' %
                        (args.dataset, args.learn_eps,
                         args.neighbor_pooling_type, args.graph_pooling_type))
                f.write("\n")
                f.write("%f %f %f %f" %
                        (train_loss, train_acc, valid_loss, valid_acc))
                f.write("\n")

        lrbar.set_description("Learning eps with learn_eps={}: {}".format(
            args.learn_eps,
            [layer.eps.data.item() for layer in model.ginlayers]))

    tbar.close()
    vbar.close()
    lrbar.close()


if __name__ == '__main__':
    args = Parser(description='GIN').args
    print('show all arguments configuration...')
    print(args)

    main(args)
Esempio n. 10
0
from lexer import Lexer
from my_parser import Parser
from codegen import CodeGen

if __name__ == '__main__':
    fname = "input.hel"
    with open(fname) as f:
        text_input = f.read()

    lexer = Lexer().get_lexer()
    tokens = lexer.lex(text_input)

    codegen = CodeGen()

    module = codegen.module
    builder = codegen.builder
    printf = codegen.printf

    pg = Parser(module, builder, printf)
    pg.parse()
    parser = pg.get_parser()
    parser.parse(tokens).eval()

    codegen.create_ir()
    codegen.save_ir("output.hll")
Esempio n. 11
0
 def setUp(self):
     self.sut = Crawler(DB.MYSQL, HTML.REDIS,
                        Parser(HTML.REDIS, PARSER.ELASTICSEARCH))
Esempio n. 12
0
    def visit_collectionaccess(self, node):
        collection = self.search_scopes(node.collection.value)
        key = self.visit(node.key)
        if not key:
            key = node.key.value
        return collection[key]

    def interpret(self, tree):
        return self.visit(tree)

    def visit_print(self, node):
        print(self.visit(node.value))


if __name__ == '__main__':
    from my_lexer import Lexer
    from my_parser import Parser
    from my_preprocessor import Preprocessor
    file = 'test.my'
    code = open(file).read()
    lexer = Lexer(code, file)
    parser = Parser(lexer)
    t = parser.parse()
    symtab_builder = Preprocessor(parser.file_name)
    symtab_builder.check(t)
    if not symtab_builder.warnings:
        interpreter = Interpreter(parser.file_name)
        interpreter.interpret(t)
    else:
        print('Did not run')
Esempio n. 13
0
class TestParser(unittest.TestCase):
    def setUp(self):
        self.file = 'file.txt'
        self.data = [
            '4 3', '<tag1 value = "HelloWorld">', '<tag2 name = "Name1">',
            '</tag2>', '</tag1>', 'tag1.tag2~name', 'tag1~name', 'tag1~value'
        ]
        self.number_of_tags = 4
        self.number_of_queries = 3

        self.tags = [
            '<tag1 value = "HelloWorld">', '<tag2 name = "Name1">', '</tag2>',
            '</tag1>'
        ]
        self.tag1_parsed = {'tag1~value': 'HelloWorld'}
        self.tag1_name = 'tag1'

        self.tags_all_parsed = {
            'tag1~value': 'HelloWorld',
            'tag1.tag2~name': 'Name1',
            'tag2~name': 'Name1'
        }

        self.queries = ['tag1.tag2~name', 'tag1~name', 'tag1~value']
        self.queries_answers = ['Name1', 'Not Found!', 'HelloWorld']

        self.Parser = Parser(self.file)

    def test_get_data_from_file(self):
        self.assertEqual(self.data, self.Parser.data)

    def test_get_number_of_tags(self):
        self.assertEqual(self.number_of_tags, self.Parser.number_of_tags)

    def test_get_tags(self):
        self.assertEqual(self.data[1:5], self.Parser.all_tags)

    def test_get_queries(self):
        self.assertEqual(self.data[5:], self.Parser.queries_in_list)

    def test_get_variables_from_tags(self):
        self.assertEqual(self.tags_all_parsed,
                         self.Parser.get_variables_from_tags())

    def test_get_not_nested_tags_names_and_variables(self):
        self.assertEqual(
            self.tag1_parsed,
            self.Parser.get_not_nested_tags_names_and_variables(0))

    def test_get_nested_tags_names_and_variables(self):
        self.assertEqual(self.tags_all_parsed,
                         self.Parser.get_nested_tags_names_and_variables(0))

    def test_get_current_tag_name_as_string(self):
        self.assertEqual(
            self.tag1_name,
            self.Parser.get_current_tag_name_as_string(self.tags[0]))

    def test_get_tag_variables_with_values(self):
        self.assertEqual(
            self.tag1_parsed,
            self.Parser.get_tag_variables_with_values(self.tags[0]))

    def test_is_opening_tag(self):
        self.assertEqual(True, self.Parser.is_opening_tag(self.tags[0]))

    def test_check_if_queries_exist_in_tags(self):
        self.assertEqual(self.queries_answers,
                         self.Parser.check_if_queries_exist_in_tags())
Esempio n. 14
0
from my_parser import Parser


def fuel_process(fuel):
    tmp_fuel = 0
    # Divide by 3
    tmp_fuel = fuel / 3
    # Round down
    tmp_fuel = int(tmp_fuel)
    # Substract 2
    tmp_fuel = tmp_fuel - 2
    return tmp_fuel


# Read file
my_parser = Parser("./input.txt")

fuel_sum = 0
fuel_down = 0
fuel_calc = 0

# Process each module mass to find the fuel required
for x in my_parser.sequence:
    # Calculate the fuel needed for the module
    fuel_calc = fuel_process(x)
    fuel_sum += fuel_calc
    fuel_down = fuel_calc
    # Calculate the fuel needed by the fuel itself
    while fuel_down > 0:
        fuel_calc = fuel_process(fuel_calc)
        if fuel_calc > 0:
Esempio n. 15
0
def translate(file):
    parser = Parser(file)
    codeWriter = CodeWriter(file)
    while parser.hasMoreCommands():
        if parser.commandType() == const.ARITHMETIC:
            codeWriter.writeArithmetic(parser.command(), parser.vmCommand())
        if parser.commandType() in [const.PUSH, const.POP]:
            codeWriter.writePushPop(parser.commandType(), parser.arg1(),
                                    parser.arg2(), parser.vmCommand())
        parser.advance()
    codeWriter.close()
Esempio n. 16
0
"""
интерфейсный модуль для оркестрации процессами программы.
"""
import pickle
from typing import Dict, List, Union

import configs as c
from my_parser import Parser
from preprocess import Preprocessing as pp
from spimi import GetIndex
from rank import RankList
from boolean_search import BooleanSearch as bs

# триггер на запуск парсера
if c.CORPUS_PARSER:
    p = Parser()
    p.run()
# триггер на сборку обратного индекса
if c.BUILD_INDEX:
    idx = GetIndex()
    idx.save_block_index()
    idx.combine_block_index()
# читаем с диска индекс и хэш с доп.информацией
with open('index/full_index.pickle', 'rb') as f:
    index: Dict[int, List] = pickle.load(f)
with open('index/doc_id_doc_name_dict.pickle', 'rb') as f:
    doc_id_doc_name_dict: Dict = pickle.load(f)

while True:
    print('\n\nInput your boolean query:')
    query_string: str = input('>')
Esempio n. 17
0
class ES:
    def __init__(self):
        self.stop_list = self.read_stop()
        self.template = {
            "index_patterns": "hw3",
            "settings": {
                "number_of_replicas": 1,
                "index.highlight.max_analyzed_offset": 2000000,
                "analysis": {
                    "filter": {
                        "english_stop": {
                            "type": "stop",
                            "stopwords": self.stop_list,
                        },
                        "my_snow": {
                            "type": "snowball",
                            "language": "English"
                        }
                    },
                    "analyzer": {
                        # custom analyzer "stopped"
                        "stopped_stem": {
                            "type":
                            "custom",
                            "tokenizer":
                            "standard",
                            "filter": [
                                "lowercase",
                                # custom filter "english_stop"
                                "english_stop",
                                "my_snow"
                            ]
                        }
                    }
                }
            },
            "mappings": {
                "_source": {
                    "enabled": "true"
                },
                "properties": {
                    # fields
                    "http_header": {
                        "type": "keyword"
                    },
                    "title": {
                        "type": "keyword"
                    },
                    "text_content": {
                        "type": "text",
                        "fielddata": "true",
                        "analyzer": "stopped_stem",
                        "index_options": "positions",
                    },
                    "raw_html": {
                        "type": "text",
                        "index": "false"
                    },
                    "in_links": {
                        "type": "keyword"
                    },
                    "out_links": {
                        "type": "keyword"
                    }
                }
            }
        }
        self.hosts = [
            "https://f2ff43d409574698a747eaa43256d1e0.northamerica-northeast1.gcp.elastic-cloud.com:9243/"
        ]
        self.cloud_id = "CS6200:bm9ydGhhbWVyaWNhLW5vcnRoZWFzdDEuZ2NwLmVsYXN0aWMtY2xvdWQuY29tJGYyZmY0M2Q0MDk1NzQ2OThhNzQ3ZWFhNDMyNTZkMWUwJDU1ZTY4MGVhZjQ5MjRmNmM5NmY5YmIxNTRjZTQyN2Fk"
        self.name = "web_crawler"
        self.index = "hw3"
        self.es = Elasticsearch(hosts=self.hosts,
                                timeout=60,
                                clould_id=self.cloud_id,
                                http_auth=('elastic',
                                           'nRGUXlzD1f8kOT63iLehSG9a'))
        self.parser = Parser()

    def initialize(self):
        self.read_stop()
        # self.es.indices.delete(index=self.index)
        self.es.indices.put_template(name=self.name, body=self.template)
        self.es.indices.create(index=self.index)

    def es_control(self):
        self.parser.initialize("./output/")
        # upload docs, headers
        docs, headers = self.parser.doc_parse()
        actions = [{
            "_op_type": "update",
            "_index": self.index,
            "_id": id,
            "doc": {
                "http_header": str(headers[id]),
                "title": "",
                "text_content": docs[id],
                "raw_html": "",
                "in_links": "",
                "out_links": ""
            },
            "doc_as_upsert": "true"
        } for id in docs]
        helpers.bulk(self.es, actions=actions)
        docs, headers = None, None

        # upload title
        titles = self.parser.title_parse()
        actions = [{
            "_op_type": "update",
            "_index": self.index,
            "_id": id,
            "script": {
                "source": """
                                if (ctx._source["title"] == "") {
                                    ctx._source["title"] = params["title"]
                                }
                            """,
                "lang": "painless",
                "params": {
                    "title": titles[id]
                }
            }
        } for id in titles]
        helpers.bulk(self.es, actions=actions)
        titles = None

        # upload html
        for i in range(20):
            raw_html = self.parser.html_parse(2000 * i, 2000 * (i + 1))
            actions = [{
                "_op_type": "update",
                "_index": self.index,
                "_id": id,
                "script": {
                    "source": """
                            if (ctx._source["raw_html"] == "") {
                                ctx._source["raw_html"] = params["html"]
                            }
                        """,
                    "lang": "painless",
                    "params": {
                        "html": raw_html[id]
                    }
                }
            } for id in raw_html]
            helpers.bulk(self.es, actions=actions)
            raw_html = None

        # upload in_links, out_links
        in_links, out_links = self.parser.links_parse()
        # with open("./output/test_in_links.json", "r") as f:
        #     in_links = json.load(f)
        # with open("./output/test_out_links.json", "r") as f:
        #     out_links = json.load(f)
        actions = [{
            "_op_type": "update",
            "_index": self.index,
            "_id": id,
            "script": {
                "source": """
                                if (ctx._source["in_links"] == "") {
                                    ctx._source["in_links"] = params["in_links"]
                                } else {
                                    for (int i = 0; i < params["length"]; ++i) {
                                        if (ctx._source["in_links"].contains(params["in_links"][i]) == false) {
                                            ctx._source["in_links"].add(params["in_links"][i])
                                        }
                                    }
                                }
                            """,
                "lang": "painless",
                "params": {
                    "in_links": in_links[id],
                    "length": len(in_links[id])
                }
            }
        } for id in in_links]
        helpers.bulk(self.es, actions=actions)

        actions = [{
            "_op_type": "update",
            "_index": self.index,
            "_id": id,
            "script": {
                "source": """
                        if (ctx._source["out_links"] == "") {
                            ctx._source["out_links"] = params["out_links"]
                        } else {
                            for (int i = 0; i < params["length"]; ++i) {
                                if (ctx._source["out_links"].contains(params["out_links"][i]) == false) {
                                    ctx._source["out_links"].add(params["out_links"][i])
                                }
                            }
                        }
                    """,
                "lang": "painless",
                "params": {
                    "out_links": out_links[id],
                    "length": len(out_links[id])
                }
            }
        } for id in out_links]
        helpers.bulk(self.es, actions=actions)

    def read_stop(self):
        stop_list = []
        with open("E:/Will/work/NEU/CS 6200/Python Project/stoplist.txt",
                  "r") as f:
            for line in f.readlines():
                stop_list.append(line.replace("\n", ""))
        return stop_list
Esempio n. 18
0
 def __init__(self):
     self.stop_list = self.read_stop()
     self.template = {
         "index_patterns": "hw3",
         "settings": {
             "number_of_replicas": 1,
             "index.highlight.max_analyzed_offset": 2000000,
             "analysis": {
                 "filter": {
                     "english_stop": {
                         "type": "stop",
                         "stopwords": self.stop_list,
                     },
                     "my_snow": {
                         "type": "snowball",
                         "language": "English"
                     }
                 },
                 "analyzer": {
                     # custom analyzer "stopped"
                     "stopped_stem": {
                         "type":
                         "custom",
                         "tokenizer":
                         "standard",
                         "filter": [
                             "lowercase",
                             # custom filter "english_stop"
                             "english_stop",
                             "my_snow"
                         ]
                     }
                 }
             }
         },
         "mappings": {
             "_source": {
                 "enabled": "true"
             },
             "properties": {
                 # fields
                 "http_header": {
                     "type": "keyword"
                 },
                 "title": {
                     "type": "keyword"
                 },
                 "text_content": {
                     "type": "text",
                     "fielddata": "true",
                     "analyzer": "stopped_stem",
                     "index_options": "positions",
                 },
                 "raw_html": {
                     "type": "text",
                     "index": "false"
                 },
                 "in_links": {
                     "type": "keyword"
                 },
                 "out_links": {
                     "type": "keyword"
                 }
             }
         }
     }
     self.hosts = [
         "https://f2ff43d409574698a747eaa43256d1e0.northamerica-northeast1.gcp.elastic-cloud.com:9243/"
     ]
     self.cloud_id = "CS6200:bm9ydGhhbWVyaWNhLW5vcnRoZWFzdDEuZ2NwLmVsYXN0aWMtY2xvdWQuY29tJGYyZmY0M2Q0MDk1NzQ2OThhNzQ3ZWFhNDMyNTZkMWUwJDU1ZTY4MGVhZjQ5MjRmNmM5NmY5YmIxNTRjZTQyN2Fk"
     self.name = "web_crawler"
     self.index = "hw3"
     self.es = Elasticsearch(hosts=self.hosts,
                             timeout=60,
                             clould_id=self.cloud_id,
                             http_auth=('elastic',
                                        'nRGUXlzD1f8kOT63iLehSG9a'))
     self.parser = Parser()
Esempio n. 19
0
from flask import Flask, json, request

from my_parser import Parser
from sql_manager import SQL_Manager

app = Flask(__name__)
parser = Parser()
manager = SQL_Manager()


@app.route('/')
def main():
    return 'OK'


@app.route('/imports', methods=['POST'])
def import_data():
    '''Retrieves data from request and adds it to the database.

    Returns:
        import_id & 201-status_code: data was imported
        message & 404/400-status_code: import failed
    '''

    data = json.loads(request.data)
    check = parser.check(data)

    if check is not True:
        return check

    return manager.import_data(data)
Esempio n. 20
0
File: main.py Progetto: petoc96/sj
def main():
    with io.open("input_correct.txt", 'r') as fin:
        text = fin.read()
    lexer = Lexer(text)
    parser = Parser(lexer)
    parser.parse()