def ets_internal(table, key): schema = utils.load_schema(table) if schema is None: return None lines = [] _append_ets_internals(lines, table, key, schema) return lines
def erl_db_accessor_update_cache_fields_footer(table): schema = utils.load_schema(table) if schema is None: return None lines = [] lines.append(' _ -> none') lines.append(' end.') return lines
def erl_mod_c2s_to_pb(module, table, pb_type): schema = utils.load_schema(table) if schema is None: return None pb_schema = schema['pb_types'][pb_type] lines = [] _append_to_pb(lines, module, table, schema, pb_type, pb_schema) return lines
def ets_external(table, key): schema = utils.load_schema(table) if schema is None: return None lines = [] _append_common_exports(lines, table, schema) _append_common_defines(lines, table, schema) _append_ets_defines(lines, table, schema) return lines
def sql_create_table(table): schema = utils.load_schema(table) if schema is None: return None lines = [] _append_create_table(lines, table, schema) _append_primary_key(lines, table, schema) if schema.has_key('autoIncrement'): _append_auto_increment(lines, table, schema) return lines
def hrl_record(table, is_partial): is_partial = is_partial.strip() == 'true' schema = utils.load_schema(table) if schema is None: return None lines = [] lines.append('%%%% %s基本信息(修改数据库时,需要修改此record)' % utils.get(schema, 'description', table)) _hrl_record_with_schema(lines, table, schema, is_partial) return lines
def erl_db_accessor_extension(table, x, encode_decode='false'): schema = utils.load_schema(table) if schema is None: return None encode_decode = encode_decode.strip() == 'true' lines = [] _append_accessor_export_common(lines, table, schema) _append_common_db_crud_export(lines, table, schema) _append_extension_db_crud_export(lines, table, x, schema) _append_accessor_common(lines, table, encode_decode, schema) _append_extension_db_crud(lines, table, x, schema) return lines
def erl_db_accessor_update_cache_fields_update(table): schema = utils.load_schema(table) if schema is None: return None lines = [] lines.append('New_%s = %s#%s{' % (table.title(), table.title(), table)) def get_line(field_name, is_last): has_comma = (not is_last) return ' %s = %s%s' % (field_name, field_name.title(), has_comma and ',' or '') utils.append_single(lines, table, schema['cache_fields'], get_line) lines.append('},') lines.append('ok = update(New_%s),' % table.title()) lines.append('New_%s;' % table.title()) return lines
def erl_db_accessor_update_cache_fields_header(table): schema = utils.load_schema(table) if schema is None: return None params = schema['primaryKey'] if type(params) == list: params = ', '.join(['%s' % key.title() for key in params]) else: params = params.title() lines = [] lines.append('%% @doc Calculate the cache fields #%s' % table) lines.append('update_cache_fields(%s) ->' % params) lines.append(' case load(%s) of' % params) lines.append(' %s = #%s{' % (table.title(), table)) def get_line_key(field_name, is_last): return ' %s = %s,' % (field_name, field_name.title()) utils.append_single(lines, table, schema['cache_keys'], get_line_key) def get_line_field(field_name, is_last): has_comma = (not is_last) return ' %s = _Old_%s%s' % (field_name, field_name.title(), has_comma and ',' or '') utils.append_single(lines, table, schema['cache_fields'], get_line_field) lines.append(' } ->') return lines
from pyspark.sql import SparkSession from utils import spark_schema_from_json, load_schema spark = SparkSession.builder.master("spark://vm1:7077").appName( "Language_forks").getOrCreate() db_schema = load_schema() df_projects = spark.read.csv( # path="/home/hthuwal/Desktop/Ghtorrent_heads/projects.csv", # schema=spark_schema_from_json(db_schema["projects.csv"]), path="hdfs:/projects.csv", schema=spark_schema_from_json(db_schema["projects_new.csv"]), multiLine=True, nullValue="\\N", ) # df = df_projects.where(df_projects.forked_from.isNotNull()).groupBy(df_projects.language).agg(sf.count().alias('num_forks')).sort(df_projects.num_forks.desc()) df_projects.createOrReplaceTempView("temp") query = """ SELECT language, count(*) as num_forks FROM temp WHERE forked_from is NOT NULL AND language is NOT NULL GROUP BY language ORDER BY num_forks DESC """ df = spark.sql(query) df.coalesce(1).write.json("Language_forks", mode="overwrite") # spark-submit --master spark://vm1:7077 --deploy-mode client --executor-memory 3G --driver-memory 4G --conf spark.driver.maxResultSize=3g language_forks.py
def erl_mod_c2s_record_to_fields(table, record): schema = utils.load_schema(table) if schema is None: return None lines = [] _append_record_to_fields_match(lines, table, schema, record) return lines
def erl_mod_c2s_helpers(table): schema = utils.load_schema(table) if schema is None: return None lines = [] _append_records_to_lists(lines, table, schema) return lines